/*

Preschool Availability and Female Labor Force Participation: Evidence from Indonesia
Daniel Halim, Hillary C. Johnson, Elizaveta Perova

The World Bank
East Asia Pacific Gender Innovation Lab (EAPGIL)

June 2020

Objective: clean IFLS data from raw data to be used as main outcome variables

*/
capture log close

******************************************************************************
* Part 1: create lists of unique districts in wave 2-5
******************************************************************************

* Note: the dta file is imported from KEMENDAGRI's excel file

*----------------------			1997		----------------------------*

use "$raw/District codes/kab_codes_93_09.dta",clear
* drop subsequent district codes
drop kabcode_des99 kabname_des99 ///
	kabcode_des01 kabname_des01 kabcode_des02 kabname_des02 kabcode_des03 ///
	kabname_des03 kabcode_jul07 kabname_jul07 kabcode_des07 kabname_des07 ///
	kabcode_jul08 kabname_jul08 kabcode_des08 kabname_des08 kabcode_apr09 ///
	kabname_apr09 kabcode_jun09 kabname_jun09 kabcode_agt09 kabname_agt09
* remove rows that share all same district names and codes in previous years
duplicates drop
* report few that are left
duplicates report kabcode_des97
* tag to identify in browser
duplicates tag kabcode_des97,gen(dup)

* manually drop districts that have same codes but different names in 1997
* since these districts will be traced back to the same district in 1993
drop if kabname_des97=="MANDAILING NATAL"
drop if kabname_des97=="TOBA SAMOSIR"
drop if kabname_des97=="JAKARTA UTARA"
drop if kabname_des97=="BENGKAYANG"
drop if kabname_des97=="LUWU UTARA"
drop if kabname_des97=="BOALEMO"
* drop duplicate identifier
drop dup
* report duplicates
duplicates report kabcode_des97
* remove province headings (these denote province names and not districts)
gen prov=mod(kabcode_des97,100)
drop if prov==0
drop prov
save "$clean/District codes/kab_codes_93_97.dta",replace


*----------------------			2000		----------------------------*
use "$raw/District codes/kab_codes_93_09.dta",clear
* drop subsequent district codes
drop kabcode_des01 kabname_des01 kabcode_des02 kabname_des02 kabcode_des03 ///
	kabname_des03 kabcode_jul07 kabname_jul07 kabcode_des07 kabname_des07 ///
	kabcode_jul08 kabname_jul08 kabcode_des08 kabname_des08 kabcode_apr09 ///
	kabname_apr09 kabcode_jun09 kabname_jun09 kabcode_agt09 kabname_agt09
* remove rows that share all same district names and codes in previous years
duplicates drop
* report few that are left
duplicates report kabcode_des99
* tag to identify in browser
duplicates tag kabcode_des99,gen(dup)

* manually drop districts that have same codes but different names in 1999
* since these districts will be traced back to the same district in 1993
drop if kabname_des99=="JAKARTA UTARA"
drop if kabname_des97=="SAMBAS" & kabname_des99=="BENGKAYANG"
* drop duplicate identifier
drop dup
* report duplicates
duplicates report kabcode_des99
* remove province headings (these denote province names and not districts)
gen prov=mod(kabcode_des99,100)
drop if prov==0
drop prov
save "$clean/District codes/kab_codes_93_99.dta",replace


*----------------------			2007		----------------------------*
use "$raw/District codes/kab_codes_93_09.dta",clear
* drop subsequent district codes
drop kabcode_jul08 kabname_jul08 kabcode_des08 kabname_des08 kabcode_apr09 ///
	kabname_apr09 kabcode_jun09 kabname_jun09 kabcode_agt09 kabname_agt09
* remove rows that share all same district names and codes in previous years
duplicates drop
* report few that are left
duplicates report kabcode_des07
save "$clean/District codes/kab_codes_93_07.dta",replace

*----------------------			2014		----------------------------*
/* Note:
	- BPS Master File Kabupaten does not yet extend to 2014
	- But IFLS5 contains individuals' geographic identifiers in 2014
	  using 2007 BPS codes
	- The same 2007 district list will be used to convert 2014 district
	  locations to 1993 BPS codes
*/

***************************************************************************
***PART 2: Clean IFLS HTRACK and PTRACK				                    ***
***************************************************************************

use "$ifls_raw2/htrack",clear
duplicates tag hhid93 hhid97,gen(dup)
drop if dup>0
drop dup
drop if missing(hhid97)
gen wave=2
save "$ifls_clean2/htrack",replace

use "$ifls_raw3/htrack",clear
duplicates tag hhid93 hhid97 hhid00,gen(dup)
drop if dup>0
drop dup
drop if missing(hhid00)
gen wave=3
save "$ifls_clean3/htrack",replace

use "$ifls_raw4/htrack",clear
duplicates tag hhid93 hhid97 hhid00 hhid07,gen(dup)
drop if dup>0
drop dup
drop if missing(hhid07)
gen wave=4
save "$ifls_clean4/htrack",replace

use "$ifls_raw5/htrack",clear
duplicates tag hhid93 hhid97 hhid00 hhid07 hhid14,gen(dup)
drop if dup>0
drop dup
drop if missing(hhid14)
gen wave=5
save "$ifls_clean5/htrack",replace


****** Remove duplicate pidlink from latest PTRACK
use "$ifls_raw5/ptrack",clear
duplicates drop pidlink,force
save "$ifls_clean5/ptrack",replace

***************************************************************************
*** PART 3: Obtain years of schooling and religion and person id of parents          ***
***************************************************************************

/*------------------------------------------------------
                            1993
--------------------------------------------------------*/

use "$ifls_raw1/bukkar2.dta", clear

* Construct variable years of schooling from level of education & grade completed
* unschooled
gen yos = 0 if ar16==1
* primary
replace yos = ar17 if ar16==2 & ar17<7
replace yos = 6 if ar16==2 & ar17==7
* middle school
forval i=3/4{
replace yos = 6 + ar17 if ar16==`i' & ar17<4
replace yos = 9 if ar16==`i' & (ar17>=4 & ar17<=7)
}
* high school
forval i=5/6{
replace yos = 9 + ar17 if ar16==`i' & ar17<4
replace yos = 12 if ar16==`i' & (ar17>=4 & ar17<=7)
}
* community college
replace yos = 12+ar17 if ar16==7 & ar17<3
replace yos = 14 if ar16==7 & (ar17>=3 & ar17<=7)
replace yos = 12+ar17 if ar16==8 & ar17<4
replace yos = 15 if ar16==8 & (ar17>=4 & ar17<=7)
* uni & post-grad
replace yos = 12+ar17 if ar16==9 & ar17<7
replace yos = 16 if ar16==9 & ar17==7
* note: uni & post-grad are not distinguished
* assume: complete uni (ar17==7) means complete bachelor
label var yos "Years of schooling"

keep yos hhid93 pid93 pidlink ar10 ar11 ar15 ar16 ar17
ren ar* ar*93
ren yos yos93

gen muslim93=ar15==1
label var muslim93 "Muslim"

save "$ifls_clean1/Education-All.dta",replace

/*------------------------------------------------------
                      1997
--------------------------------------------------------*/

use "$ifls_raw2/bk_ar1.dta", clear

* Construct variable years of schooling from level of education & grade completed
* unschooled
gen yos = 0 if ar16==1
* primary
replace yos = ar17 if ar16==2 & ar17<7
replace yos = 6 if ar16==2 & ar17==7
* middle school
forval i=3/4{
replace yos = 6 + ar17 if ar16==`i' & ar17<4
replace yos = 9 if ar16==`i' & (ar17>=4 & ar17<=7)
}
* high school
forval i=5/6{
replace yos = 9 + ar17 if ar16==`i' & ar17<4
replace yos = 12 if ar16==`i' & (ar17>=4 & ar17<=7)
}
* community college
replace yos = 12+ar17 if ar16==7 & ar17<3
replace yos = 14 if ar16==7 & (ar17>=3 & ar17<=7)
replace yos = 12+ar17 if ar16==8 & ar17<4
replace yos = 15 if ar16==8 & (ar17>=4 & ar17<=7)
* uni & post-grad
replace yos = 12+ar17 if ar16==9 & ar17<7
replace yos = 16 if ar16==9 & ar17==7
* note: uni & post-grad are not distinguished
* assume: complete uni (ar17==7) means complete bachelor
label var yos "Years of schooling"

keep yos hhid97 pid97 pidlink ar10 ar11 ar15 ar16 ar17
ren ar* ar*97
ren yos yos97

gen muslim97=ar15==1
label var muslim97 "Muslim"

save "$ifls_clean2/Education-All.dta",replace


/*------------------------------------------------------
                          2000
--------------------------------------------------------*/

use "$ifls_raw3/bk_ar1.dta", clear

* Construct variable years of schooling from level of education & grade completed
* unschooled
gen yos = 0 if ar16==1
* primary
replace yos = ar17 if (ar16==2|ar16==72) & ar17<7
replace yos = 6 if ar16==2 & ar17==7
* middle school
forval i=3/4{
replace yos = 6 + ar17 if (ar16==`i'|ar16==73) & ar17<4
replace yos = 9 if ar16==`i' & (ar17>=4 & ar17<=7)
}
* high school
forval i=5/6{
replace yos = 9 + ar17 if (ar16==`i'|ar16==74) & ar17<4
replace yos = 12 if ar16==`i' & (ar17>=4 & ar17<=7)
}
* community college
replace yos = 12+ar17 if ar16==60 & ar17<4
replace yos = 15 if ar16==60 & (ar17>=4 & ar17<=7)
* uni
replace yos = 12+ar17 if ar16==61 & ar17<5
replace yos = 16 if ar16==61 & (ar17>=5 & ar17<=7)
* masters
replace yos = 16+ar17 if ar16==62 & ar17<3
replace yos = 18 if ar16==62 & (ar17>=3 & ar17<=7)
* phd
replace yos = 18+ar17 if ar16==63 & ar17<6
replace yos = 23 if ar16==63 & (ar17>=6 & ar17<=7)
/* note: uni & post-grad are distinguished
		 for comparability purpose, assume that all PhDs have to go thru
		 2 years of masters + 5 years of PhD
*/
label var yos "Years of schooling"

keep yos hhid00 pid00 pidlink ar10 ar11 ar15 ar16 ar17
ren ar* ar*00
ren yos yos00

gen muslim00=ar15==1
label var muslim00 "Muslim"

save "$ifls_clean3/Education-All.dta",replace


/*------------------------------------------------------
                          2007
--------------------------------------------------------*/

use "$ifls_raw4/bk_ar1.dta", clear

* Construct variable years of schooling from level of education & grade completed
* unschooled
gen yos = 0 if ar16==1
* primary
replace yos = ar17 if (ar16==2|ar16==72) & ar17<7
replace yos = 6 if ar16==2 & ar17==7
* middle school
forval i=3/4{
replace yos = 6 + ar17 if (ar16==`i'|ar16==73) & ar17<4
replace yos = 9 if ar16==`i' & (ar17>=4 & ar17<=7)
}
* high school
forval i=5/6{
replace yos = 9 + ar17 if (ar16==`i'|ar16==74) & ar17<4
replace yos = 12 if ar16==`i' & (ar17>=4 & ar17<=7)
}
* community college
replace yos = 12+ar17 if ar16==60 & ar17<4
replace yos = 15 if ar16==60 & (ar17>=4 & ar17<=7)
* uni
replace yos = 12+ar17 if ar16==61 & ar17<5
replace yos = 16 if ar16==61 & (ar17>=5 & ar17<=7)
* masters
replace yos = 16+ar17 if ar16==62 & ar17<3
replace yos = 18 if ar16==62 & (ar17>=3 & ar17<=7)
* phd
replace yos = 18+ar17 if ar16==63 & ar17<6
replace yos = 23 if ar16==63 & (ar17>=6 & ar17<=7)
/* note: uni & post-grad are distinguished
		 for comparability purpose, assume that all PhDs have to go thru
		 2 years of masters + 5 years of PhD
*/
label var yos "Years of schooling"

keep yos hhid07 pid07 pidlink ar10 ar11 ar15 ar16 ar17
ren ar* ar*07
ren yos yos07

gen muslim07=ar15==1
label var muslim07 "Muslim"

save "$ifls_clean4/Education-All.dta",replace


/*------------------------------------------------------
                      2014
--------------------------------------------------------*/

use "$ifls_raw5/bk_ar1.dta", clear

* Construct variable years of schooling from level of education & grade completed
* unschooled
gen yos = 0 if ar16==1
* primary
replace yos = ar17 if (ar16==2|ar16==72) & ar17<7
replace yos = 6 if ar16==2 & ar17==7
* middle school
forval i=3/4{
replace yos = 6 + ar17 if (ar16==`i'|ar16==73) & ar17<4
replace yos = 9 if ar16==`i' & (ar17>=4 & ar17<=7)
}
* high school
forval i=5/6{
replace yos = 9 + ar17 if (ar16==`i'|ar16==74) & ar17<4
replace yos = 12 if ar16==`i' & (ar17>=4 & ar17<=7)
}
* community college
replace yos = 12+ar17 if ar16==60 & ar17<4
replace yos = 15 if ar16==60 & (ar17>=4 & ar17<=7)
* uni
replace yos = 12+ar17 if ar16==61 & ar17<5
replace yos = 16 if ar16==61 & (ar17>=5 & ar17<=7)
* masters
replace yos = 16+ar17 if ar16==62 & ar17<3
replace yos = 18 if ar16==62 & (ar17>=3 & ar17<=7)
* phd
replace yos = 18+ar17 if ar16==63 & ar17<6
replace yos = 23 if ar16==63 & (ar17>=6 & ar17<=7)
/* note: uni & post-grad are distinguished
		 for comparability purpose, assume that all PhDs have to go thru
		 2 years of masters + 5 years of PhD
*/
label var yos "Years of schooling"

keep yos hhid14 pid14 pidlink ar10 ar11 ar15 ar16 ar17
ren ar* ar*14
ren yos yos14

gen muslim14=ar15==1
label var muslim14 "Muslim"

save "$ifls_clean5/Education-All.dta",replace

***************************************************************************
*** PART 4: Work history from each wave
***************************************************************************

***************************************************************************
*** PART 1: IFLS1
***************************************************************************

*** start with work history
use "$ifls_raw1/buk3tk3",clear

*** keep necessary variables
keep pidlink year tk28 occ32 tk35 tk38 tk36r1 tk37r1 tk33

*** rename variables
ren tk28 work
ren tk35 workstat
ren tk38 sidejob
ren occ32 occ

*** recode work
recode work (3=0) //(6 7 9 = .)
recode sidejob (3=0) //(6 7 8 9 = .)
*recode workstat (7 8 9 = .)

*** define label values
la def binary 1 "Yes" 0 "No"
la def workstat 1 "Self employed w/o help" ///
  2 "Self employed w/ temporary worker" ///
  3 "Self employed w/ permanent worker" 4 "Government worker" ///
  5 "Private worker" 6 "Unpaid family worker"

*** assign value labels
foreach v in work sidejob {
	la val `v' binary
}
la val workstat workstat

*** salary
ren tk36r1 wage_m
replace wage_m = . if wage_m>=99995

*** profit
ren tk37r1 profit_m
replace profit_m = . if profit_m>=99995

*** income
egen income_m = rowtotal(wage_m profit_m)
replace income_m = . if wage_m==. & profit_m==.

*** work hours
ren tk33 workhour
replace workhour = . if workhour>=95

* recode year
replace year = 1900+year if year<100

* reshape to wide
reshape wide work occ workstat sidejob wage_m profit_m income_m workhour, i(pidlink) j(year)

********************** 			merge with other modules

** merge with TK1
merge 1:1 pidlink using "$ifls_raw1/buk3tk1"
drop _merge

** merge with TK2
merge 1:1 pidlink using "$ifls_raw1/buk3tk2"
drop _merge

** merge with age
merge 1:1 pidlink using "$ifls_raw1/bukkar2"
keep if _merge==3
drop _merge


********************** 			construct/clean variables

*** work participation
* current
gen work1993 = 0
replace work1993 = 1 if tk01==1
replace work1993 = 1 if tk02==1
replace work1993 = 1 if tk03==1
replace work1993 = 1 if tk04==1
* replace past employment as not work if never work before
forval j=1988/1992 {
	replace work`j' = 0 if work`j'!=1 & tk05==3
}
* replace past employment as not work if worked before 87
forval j=1988/1992 {
	replace work`j' = 0 if work`j'!=1 & tk05==1 & tk06==3
}

*** work status
ren tk24a workstat1993
*recode workstat1993 (7 8 9 = .)

*** sidejob
ren tk27 sidejob1993
recode sidejob1993 (3=0) //(8 = .)

*** product
ren tk19a product1993

*** duty
ren tk20a duty1993

*** occupation
ren occ20a occ1993

*** main activity
gen working_om1993 = tk01==1 if tk01<95
gen searching_om1993 = tk01==2 if tk01<95
gen schooling_om1993 = tk01==3 if tk01<95
gen housekeeping_om1993 = tk01==4 if tk01<95

*** salary
gen wage_m1993 = tk25r1_m
replace wage_m1993 = . if tk25r1_m>=99995
gen wage_y1993 = tk25r1_y
replace wage_y1993 = . if tk25r1_y>=999995

*** profit
gen profit_m1993 = tk26r1_m
replace profit_m1993 = . if tk26r1_m>=99995
gen profit_y1993 = tk26r1_y
replace profit_y1993 = . if tk26r1_y>=99995

*** income
egen income_m1993 = rowtotal(wage_m1993 profit_m1993)
replace income_m1993 = . if wage_m1993==. & profit_m1993==.
egen income_y1993 = rowtotal(wage_y1993 profit_y1993)
replace income_y1993 = . if wage_y1993==. & profit_y1993==.

*** work hour
gen workhour1993 = tk22a
replace workhour1993 = . if tk22a>=95

********************** 			restructure data

*** keep necessary variables
keep pidlink work* sidejob* occ* ar07 ar08yr ar08mth ar09yr ar09mth hhid93 pid93 ///
	working_om* searching_om* schooling_om* housekeeping_om* ///
	wage_m* wage_y* profit_m* profit_y* income_m* income_y* workhour*
drop occ12 occ20b

********************** 			create variables consistent across waves

forval j=1988/1993 {

**** work
	* 1) yes/no otherwise missing
	gen work_om`j' = work`j'
	replace work_om`j' = . if work`j'>1

	* 2) yes otherwise no
	gen work_on`j' = work`j'==1

**** sidejob
	* step 1: no side job if not working
		replace sidejob`j' = 0 if work`j'==0

	* step 2:
		* 1) yes/no otherwise missing
		gen sidejob_om`j' = sidejob`j'
		replace sidejob_om`j' = . if sidejob`j'>1
		/* Note:
			- possible for sidejob to be defined and main work missing
			  since we code DK to missing
		*/

		* 2) yes otherwise no
		gen sidejob_on`j' = sidejob`j'==1


**** occupation
	* step 1: aggregate to first digit
		gen occ1_`j' = substr(occ`j',1,1)

	* step 2: create dummies for each heading
		* 1) yes/no otherwise missing (unconditional on working)
		gen professional_om`j' = occ1_`j'=="0" | occ1_`j'=="1" if work_om`j'!=.
		gen manager_om`j' = occ1_`j'=="2" if work_om`j'!=.
		gen clerk_om`j' = occ1_`j'=="3" if work_om`j'!=.
		gen sales_om`j' = occ1_`j'=="4" if work_om`j'!=.
		gen service_om`j' = occ1_`j'=="5" if work_om`j'!=.
		gen agricultural_om`j' = occ1_`j'=="6" if work_om`j'!=.
		gen production_om`j' = occ1_`j'=="7" if work_om`j'!=.

		* 2) yes otherwise no (unconditional on working)
		gen professional_on`j' = occ1_`j'=="0" | occ1_`j'=="1"
		gen manager_on`j' = occ1_`j'=="2"
		gen clerk_on`j' = occ1_`j'=="3"
		gen sales_on`j' = occ1_`j'=="4"
		gen service_on`j' = occ1_`j'=="5"
		gen agricultural_on`j' = occ1_`j'=="6"
		gen production_on`j' = occ1_`j'=="7"

		* 3) conditional on working
		gen professional_c`j' = occ1_`j'=="0" | occ1_`j'=="1" if work_on`j'==1
		gen manager_c`j' = occ1_`j'=="2" if work_on`j'==1
		gen clerk_c`j' = occ1_`j'=="3" if work_on`j'==1
		gen sales_c`j' = occ1_`j'=="4" if work_on`j'==1
		gen service_c`j' = occ1_`j'=="5" if work_on`j'==1
		gen agricultural_c`j' = occ1_`j'=="6" if work_on`j'==1
		gen production_c`j' = occ1_`j'=="7" if work_on`j'==1

**** work status
	* 1) yes/no otherwise missing (unconditional on working)
	gen selfemp_om`j' = inrange(workstat`j',1,3) if work_om`j'!=.
	gen govwork_om`j' = workstat`j'==4 if work_om`j'!=.
	gen pvtwork_om`j' = workstat`j'==5 if work_om`j'!=.
	gen famwork_om`j' = workstat`j'==6 if work_om`j'!=.

	* 2) yes otherwise no (unconditional on working)
	gen selfemp_on`j' = inrange(workstat`j',1,3)
	gen govwork_on`j' = workstat`j'==4
	gen pvtwork_on`j' = workstat`j'==5
	gen famwork_on`j' = workstat`j'==6

	* 3) conditional on working
	gen selfemp_c`j' = inrange(workstat`j',1,3) if work_on`j'==1
	gen govwork_c`j' = workstat`j'==4 if work_on`j'==1
	gen pvtwork_c`j' = workstat`j'==5 if work_on`j'==1
	gen famwork_c`j' = workstat`j'==6 if work_on`j'==1
}

********************** 			merge with education and religion

merge 1:1 hhid93 pid93 pidlink using "$ifls_clean1/Education-All.dta", keep(1 3) keepusing(yos muslim)
drop _merge

********************** 			merge with expenditure capita

merge m:1 hhid93 using "$raw/IFLS/pce_allwaves/pce93nom.dta", keep(1 3) keepusing(xnonfoodtot xfoodtot pce hhsize93)
rename (xnonfoodtot xfoodtot) (xnonfood xfood)
gen pce_food1993=xfood/hhsize93
gen pce_nonfood1993=xnonfood/hhsize93
gen pce_total1993=pce
gen exp_food1993=xfood
gen exp_nonfood1993=xnonfood
gen exp_total1993=exp_food+exp_nonfood
ren hhsize hhsize1993
drop _merge xnonfood xfood pce

save "$ifls_clean1/revised work history",replace

***************************************************************************
***PART 2: IFLS2
***************************************************************************

use "$ifls_raw2/b3a_tk3",clear

*** keep necessary variables
keep pidlink tk28yr tk28 tk32occ tk32ind tk33 tk38 tk34 tk35n tk36

*** rename variables
ren tk28 work
ren tk33 workstat
ren tk38 sidejob
ren tk32ind ind
ren tk32occ occ
ren tk28yr year

*** recode work
recode work (3=0) //(6 7 8 9 = .)
recode sidejob (3=0) //(6 7 8 9 = .)
*recode workstat (7 8 9 = .)

*** define label values
la def binary 1 "Yes" 0 "No" 8 "Don't know"
la def workstat 1 "Self employed" ///
  4 "Government worker" ///
  5 "Private worker" 6 "Unpaid family worker" 98 "Don't know"

*** assign value labels
foreach v in work sidejob {
	la val `v' binary
}
la val workstat workstat

*** salary
gen wage_m = tk34/1000
drop tk34

*** profit
gen profit_m = tk35n/1000
drop tk35n

*** income
egen income_m = rowtotal(wage_m profit_m)
replace income_m = . if wage_m==. & profit_m==.

*** work hours
ren tk36 workhour
replace workhour = . if workhour>=95

* reshape to wide
reshape wide work ind occ workstat sidejob wage_m profit_m income_m workhour, i(pidlink) j(year)

********************** 			merge with other modules

** merge with TK1
merge 1:1 pidlink using "$ifls_raw2/b3a_tk1"
drop _merge

** merge with TK2
merge 1:1 pidlink using "$ifls_raw2/b3a_tk2"
drop _merge


********************** 			construct/clean variables

*** work participation
* current
gen work1997 = 0
replace work1997 = 1 if tk01==1
replace work1997 = 1 if tk02==1
replace work1997 = 1 if tk03==1
replace work1997 = 1 if tk04==1
** replace past employment as not work if never work before
forval j=1988/1996 {
	replace work`j' = 0 if work`j'!=1 & tk05==3
}
** replace past employment as not work if worked before 87
forval j=1988/1996 {
	replace work`j' = 0 if work`j'!=1 & tk05==1 & tk06a==3
}
** replace past employment as not work if worked before 92
forval j=1992/1996 {
	replace work`j' = 0 if work`j'!=1 & tk05==1 & tk06b==3
}

*** work status
ren tk24a workstat1997
*recode workstat1997 (7 8 9 = .)

*** sidejob
ren tk27 sidejob1997
*recode sidejob199& (3=0) (7 8 9 = .)

*** occupation
ren tk20aocc occ1997

*** industry
ren tk20aind ind1997

*** main activity
gen working_om1997 = tk01==1 if tk01<95
gen searching_om1997 = tk01==2 if tk01<95
gen schooling_om1997 = tk01==3 if tk01<95
gen housekeeping_om1997 = tk01==4 if tk01<95

*** salary
gen wage_m1997 = tk25am/1000
gen wage_y1997 = tk25ay/1000

*** profit
gen profit_m1997 = tk26amn/1000
gen profit_y1997 = tk26ayn/1000

*** income
egen income_m1997 = rowtotal(wage_m1997 profit_m1997)
replace income_m1997 = . if wage_m1997==. & profit_m1997==.
egen income_y1997 = rowtotal(wage_y1997 profit_y1997)
replace income_y1997 = . if wage_y==. & profit_y==.

*** workhour
ren tk22a workhour1997
replace workhour1997 = . if workhour1997>=95

********************** 			restructure data

*** keep necessary variables
keep pidlink work* sidejob* ind* occ* hhid97 pid97 ///
	working_om* searching_om* schooling_om* housekeeping_om* ///
	wage_m* wage_y* profit_m* profit_y* income_m* income_y* workhour*

********************** 			create variables consistent across waves

forval j=1988/1997 {

**** work
	* 1) yes/no otherwise missing
	gen work_om`j' = work`j'
	replace work_om`j' = . if work`j'>1

	* 2) yes otherwise no
	gen work_on`j' = work`j'==1

**** sidejob
	* step 1: no side job if not working
		replace sidejob`j' = 0 if work`j'==0

	* step 2:
		* 1) yes/no otherwise missing
		gen sidejob_om`j' = sidejob`j'
		replace sidejob_om`j' = . if sidejob`j'>1
		/* Note:
			- possible for sidejob to be defined and main work missing
			  since we code DK to missing
		*/

		* 2) yes otherwise no
		gen sidejob_on`j' = sidejob`j'==1


**** occupation
	* step 1: aggregate to first digit
		gen occ1_`j' = substr(occ`j',1,1)

	* step 2: create dummies for each heading
		* 1) yes/no otherwise missing (unconditional on working)
		gen professional_om`j' = occ1_`j'=="0" | occ1_`j'=="1" if work_om`j'!=.
		gen manager_om`j' = occ1_`j'=="2" if work_om`j'!=.
		gen clerk_om`j' = occ1_`j'=="3" if work_om`j'!=.
		gen sales_om`j' = occ1_`j'=="4" if work_om`j'!=.
		gen service_om`j' = occ1_`j'=="5" if work_om`j'!=.
		gen agricultural_om`j' = occ1_`j'=="6" if work_om`j'!=.
		gen production_om`j' = occ1_`j'=="7" if work_om`j'!=.

		* 2) yes otherwise no (unconditional on working)
		gen professional_on`j' = occ1_`j'=="0" | occ1_`j'=="1"
		gen manager_on`j' = occ1_`j'=="2"
		gen clerk_on`j' = occ1_`j'=="3"
		gen sales_on`j' = occ1_`j'=="4"
		gen service_on`j' = occ1_`j'=="5"
		gen agricultural_on`j' = occ1_`j'=="6"
		gen production_on`j' = occ1_`j'=="7"

		* 3) conditional on working
		gen professional_c`j' = occ1_`j'=="0" | occ1_`j'=="1" if work_on`j'==1
		gen manager_c`j' = occ1_`j'=="2" if work_on`j'==1
		gen clerk_c`j' = occ1_`j'=="3" if work_on`j'==1
		gen sales_c`j' = occ1_`j'=="4" if work_on`j'==1
		gen service_c`j' = occ1_`j'=="5" if work_on`j'==1
		gen agricultural_c`j' = occ1_`j'=="6" if work_on`j'==1
		gen production_c`j' = occ1_`j'=="7" if work_on`j'==1

**** work status
	* 1) yes/no otherwise missing (unconditional on working)
	gen selfemp_om`j' = inrange(workstat`j',1,3) if work_om`j'!=.
	gen govwork_om`j' = workstat`j'==4 if work_om`j'!=.
	gen pvtwork_om`j' = workstat`j'==5 if work_om`j'!=.
	gen famwork_om`j' = workstat`j'==6 if work_om`j'!=.

	* 2) yes otherwise no (unconditional on working)
	gen selfemp_on`j' = inrange(workstat`j',1,3)
	gen govwork_on`j' = workstat`j'==4
	gen pvtwork_on`j' = workstat`j'==5
	gen famwork_on`j' = workstat`j'==6

	* 3) conditional on working
	gen selfemp_c`j' = inrange(workstat`j',1,3) if work_on`j'==1
	gen govwork_c`j' = workstat`j'==4 if work_on`j'==1
	gen pvtwork_c`j' = workstat`j'==5 if work_on`j'==1
	gen famwork_c`j' = workstat`j'==6 if work_on`j'==1

**** industry
	* 1) yes/no otherwise missing (unconditional on working)
	gen agriculture_om`j' = ind`j'==1 if work_om`j'!=.
	gen mining_om`j' = ind`j'==2 if work_om`j'!=.
	gen manufacturing_om`j' = ind`j'==3 if work_om`j'!=.
	gen utility_om`j' = ind`j'==4 if work_om`j'!=.
	gen construction_om`j' = ind`j'==5 if work_om`j'!=.
	gen trade_om`j' = ind`j'==6 if work_om`j'!=.
	gen logistic_om`j' = ind`j'==7 if work_om`j'!=.
	gen finance_om`j' = ind`j'==8 if work_om`j'!=.
	gen social_om`j' = ind`j'==9 if work_om`j'!=.

	* 2) yes otherwise no (unconditional on working)
	gen agriculture_on`j' = ind`j'==1
	gen mining_on`j' = ind`j'==2
	gen manufacturing_on`j' = ind`j'==3
	gen utility_on`j' = ind`j'==4
	gen construction_on`j' = ind`j'==5
	gen trade_on`j' = ind`j'==6
	gen logistic_on`j' = ind`j'==7
	gen finance_on`j' = ind`j'==8
	gen social_on`j' = ind`j'==9

	* 3) conditional on working
	gen agriculture_c`j' = ind`j'==1 if work_on`j'==1
	gen mining_c`j' = ind`j'==2 if work_on`j'==1
	gen manufacturing_c`j' = ind`j'==3 if work_on`j'==1
	gen utility_c`j' = ind`j'==4 if work_on`j'==1
	gen construction_c`j' = ind`j'==5 if work_on`j'==1
	gen trade_c`j' = ind`j'==6 if work_on`j'==1
	gen logistic_c`j' = ind`j'==7 if work_on`j'==1
	gen finance_c`j' = ind`j'==8 if work_on`j'==1
	gen social_c`j' = ind`j'==9 if work_on`j'==1

	* turn ind to string
	tostring(ind`j'),replace force
}

********************** 			merge with education

merge 1:1 hhid97 pid97 pidlink using "$ifls_clean2/Education-All.dta", keep(1 3) keepusing(yos muslim)
drop _merge

********************** 			merge with expenditure capita

merge m:1 hhid97 using "$raw/IFLS/pce_allwaves/pce97nom.dta", keep(1 3) keepusing(xnonfood xfood pce hhsize)
gen pce_food1997=xfood/hhsize
gen pce_nonfood1997=xnonfood/hhsize
gen pce_total1997=pce
gen exp_food1997=xfood
gen exp_nonfood1997=xnonfood
gen exp_total1997=exp_food+exp_nonfood
rename hhsize hhsize1997
drop _merge xnonfood xfood pce


save "$ifls_clean2/revised work history",replace

***************************************************************************
***PART 3: IFLS3
***************************************************************************

use "$ifls_raw3/b3a_tk3",clear

*** keep necessary variables
keep pidlink tk28yr tk28 tk31aa0 tk32b tk33 tk38 tk34 tk35n tk36

*** rename variables
ren tk28 work
ren tk33 workstat
ren tk38 sidejob
ren tk31aa0 ind
ren tk32b occ
ren tk28yr year

*** recode work
recode work (3=0) //(6 7 8 9 = .)
recode sidejob (3=0) //(6 7 8 9 = .)
*recode workstat (7 8 9 = .)

*** define label values
la def binary 1 "Yes" 0 "No" 8 "Don't know"
la def workstat 1 "Self employed w/o help" ///
  2 "Self employed w/ temporary worker" ///
  3 "Self employed w/ permanent worker" 4 "Government worker" ///
  5 "Private worker" 6 "Unpaid family worker" 98 "Don't know"

*** assign value labels
foreach v in work sidejob {
	la val `v' binary
}
la val workstat workstat

*** salary
gen wage_m = tk34/1000
drop tk34

*** profit
gen profit_m = tk35n/1000
drop tk35n

*** income
egen income_m = rowtotal(wage_m profit_m)
replace income_m = . if wage_m==. & profit_m==.

*** work hours
ren tk36 workhour
replace workhour = . if workhour>=95

* reshape to wide
reshape wide work ind occ workstat sidejob wage_m profit_m income_m workhour, i(pidlink) j(year)

********************** 			merge with other modules

** merge with TK1
merge 1:1 pidlink using "$ifls_raw3/b3a_tk1"
drop _merge

** merge with TK2
merge 1:1 pidlink using "$ifls_raw3/b3a_tk2"
drop _merge


********************** 			construct/clean variables

*** work participation
* current
gen work2000 = 0
replace work2000 = 1 if tk01==1
replace work2000 = 1 if tk02==1
replace work2000 = 1 if tk03==1
replace work2000 = 1 if tk04==1
** replace past employment as not work if never work before
forval j=1996/1999 {
	replace work`j' = 0 if work`j'!=1 & tk05==3
}
** replace past employment as not work if worked before 96
forval j=1996/1999 {
	replace work`j' = 0 if work`j'!=1 & tk05==1 & tk06a==3
}

*** work status
ren tk24a workstat2000
*recode workstat2000 (7 8 9 = .)

*** sidejob
ren tk27 sidejob2000
*recode sidejob2000 (3=0) (7 8 9 = .)

*** occupation
ren tk20ab occ2000

*** industry
ren tk19aa ind2000

*** main activity
gen working_om2000 = tk01==1 if tk01<95
gen searching_om2000 = tk01==2 if tk01<95
gen schooling_om2000 = tk01==3 if tk01<95
gen housekeeping_om2000 = tk01==4 if tk01<95

*** salary
gen wage_m2000 = tk25a1/1000
gen wage_y2000 = tk25a2/1000

*** profit
gen profit_m2000 = tk26amn/1000
gen profit_y2000 = tk26ayn/1000

*** income
egen income_m2000 = rowtotal(wage_m2000 profit_m2000)
replace income_m2000 = . if wage_m2000==. & profit_m2000==.
egen income_y2000 = rowtotal(wage_y2000 profit_y2000)
replace income_y2000 = . if wage_y2000==. & profit_y2000==.

*** workhour
ren tk22a workhour2000
replace workhour2000 = . if workhour2000>=95


********************** 			restructure data

*** keep necessary variables
keep pidlink work* sidejob* ind* occ* hhid00 pid00 ///
	working_om* searching_om* schooling_om* housekeeping_om* ///
	wage_m* wage_y* profit_m* profit_y* income_m* income_y* workhour*

********************** 			create variables consistent across waves

forval j=1996/2000 {

**** work
	* 1) yes/no otherwise missing
	gen work_om`j' = work`j'
	replace work_om`j' = . if work`j'>1

	* 2) yes otherwise no
	gen work_on`j' = work`j'==1

**** sidejob
	* step 1: no side job if not working
		replace sidejob`j' = 0 if work`j'==0

	* step 2:
		* 1) yes/no otherwise missing
		gen sidejob_om`j' = sidejob`j'
		replace sidejob_om`j' = . if sidejob`j'>1
		/* Note:
			- possible for sidejob to be defined and main work missing
			  since we code DK to missing
		*/

		* 2) yes otherwise no
		gen sidejob_on`j' = sidejob`j'==1


**** occupation
	* step 1: aggregate to first digit
		gen occ1_`j' = substr(occ`j',1,1)

	* step 2: create dummies for each heading
		* 1) yes/no otherwise missing (unconditional on working)
		gen professional_om`j' = occ1_`j'=="0" | occ1_`j'=="1" if work_om`j'!=.
		gen manager_om`j' = occ1_`j'=="2" if work_om`j'!=.
		gen clerk_om`j' = occ1_`j'=="3" if work_om`j'!=.
		gen sales_om`j' = occ1_`j'=="4" if work_om`j'!=.
		gen service_om`j' = occ1_`j'=="5" if work_om`j'!=.
		gen agricultural_om`j' = occ1_`j'=="6" if work_om`j'!=.
		gen production_om`j' = occ1_`j'=="7" if work_om`j'!=.

		* 2) yes otherwise no (unconditional on working)
		gen professional_on`j' = occ1_`j'=="0" | occ1_`j'=="1"
		gen manager_on`j' = occ1_`j'=="2"
		gen clerk_on`j' = occ1_`j'=="3"
		gen sales_on`j' = occ1_`j'=="4"
		gen service_on`j' = occ1_`j'=="5"
		gen agricultural_on`j' = occ1_`j'=="6"
		gen production_on`j' = occ1_`j'=="7"

		* 3) conditional on working
		gen professional_c`j' = occ1_`j'=="0" | occ1_`j'=="1" if work_on`j'==1
		gen manager_c`j' = occ1_`j'=="2" if work_on`j'==1
		gen clerk_c`j' = occ1_`j'=="3" if work_on`j'==1
		gen sales_c`j' = occ1_`j'=="4" if work_on`j'==1
		gen service_c`j' = occ1_`j'=="5" if work_on`j'==1
		gen agricultural_c`j' = occ1_`j'=="6" if work_on`j'==1
		gen production_c`j' = occ1_`j'=="7" if work_on`j'==1

**** work status
	* 1) yes/no otherwise missing (unconditional on working)
	gen selfemp_om`j' = inrange(workstat`j',1,3) if work_om`j'!=.
	gen govwork_om`j' = workstat`j'==4 if work_om`j'!=.
	gen pvtwork_om`j' = workstat`j'==5 if work_om`j'!=.
	gen famwork_om`j' = workstat`j'==6 if work_om`j'!=.

	* 2) yes otherwise no (unconditional on working)
	gen selfemp_on`j' = inrange(workstat`j',1,3)
	gen govwork_on`j' = workstat`j'==4
	gen pvtwork_on`j' = workstat`j'==5
	gen famwork_on`j' = workstat`j'==6

	* 3) conditional on working
	gen selfemp_c`j' = inrange(workstat`j',1,3) if work_on`j'==1
	gen govwork_c`j' = workstat`j'==4 if work_on`j'==1
	gen pvtwork_c`j' = workstat`j'==5 if work_on`j'==1
	gen famwork_c`j' = workstat`j'==6 if work_on`j'==1

**** industry
	* 1) yes/no otherwise missing (unconditional on working)
	gen agriculture_om`j' = ind`j'=="1" if work_om`j'!=.
	gen mining_om`j' = ind`j'=="2" if work_om`j'!=.
	gen manufacturing_om`j' = ind`j'=="3" if work_om`j'!=.
	gen utility_om`j' = ind`j'=="4" if work_om`j'!=.
	gen construction_om`j' = ind`j'=="5" if work_om`j'!=.
	gen trade_om`j' = ind`j'=="6" if work_om`j'!=.
	gen logistic_om`j' = ind`j'=="7" if work_om`j'!=.
	gen finance_om`j' = ind`j'=="8" if work_om`j'!=.
	gen social_om`j' = ind`j'=="9" if work_om`j'!=.

	* "2") yes otherwise no (unconditional on working)
	gen agriculture_on`j' = ind`j'=="1"
	gen mining_on`j' = ind`j'=="2"
	gen manufacturing_on`j' = ind`j'=="3"
	gen utility_on`j' = ind`j'=="4"
	gen construction_on`j' = ind`j'=="5"
	gen trade_on`j' = ind`j'=="6"
	gen logistic_on`j' = ind`j'=="7"
	gen finance_on`j' = ind`j'=="8"
	gen social_on`j' = ind`j'=="9"

	* "3") conditional on working
	gen agriculture_c`j' = ind`j'=="1" if work_on`j'==1
	gen mining_c`j' = ind`j'=="2" if work_on`j'==1
	gen manufacturing_c`j' = ind`j'=="3" if work_on`j'==1
	gen utility_c`j' = ind`j'=="4" if work_on`j'==1
	gen construction_c`j' = ind`j'=="5" if work_on`j'==1
	gen trade_c`j' = ind`j'=="6" if work_on`j'==1
	gen logistic_c`j' = ind`j'=="7" if work_on`j'==1
	gen finance_c`j' = ind`j'=="8" if work_on`j'==1
	gen social_c`j' = ind`j'=="9" if work_on`j'==1
}

********************** 			merge with education

merge 1:1 hhid00 pid00 pidlink using "$ifls_clean3/Education-All.dta", keep(1 3) keepusing(yos muslim)
drop _merge

********************** 			merge with expenditure capita

merge m:1 hhid00 using "$raw/IFLS/pce_allwaves/pce00nom.dta", keep(1 3) keepusing(xnonfood xfood pce hhsize)
gen pce_food2000=xfood/hhsize
gen pce_nonfood2000=xnonfood/hhsize
gen pce_total2000=pce
gen exp_food2000=xfood
gen exp_nonfood2000=xnonfood
gen exp_total2000=exp_food+exp_nonfood
ren hhsize hhsize2000
drop _merge xnonfood xfood pce


save "$ifls_clean3/revised work history",replace

***************************************************************************
***PART 4: IFLS4
***************************************************************************

use "$ifls_raw4/b3a_tk3",clear

*** keep necessary variables
keep pidlink tk28year tk28 tk31a occ2007 tk33 tk33a

*** rename variables
ren tk28 work
ren tk33 workstat
ren tk33a sidejob
ren tk31a ind
ren occ2007 occ
ren tk28year year

*** recode work
recode work (3=0) //(6 7 8 9 = .)
recode sidejob (3=0) //(6 7 8 9 = .)
*recode workstat (7 8 9 = .)

*** define label values
la def binary 1 "Yes" 0 "No" 8 "Don't know"
la def workstat 1 "Self employed w/o help" ///
  2 "Self employed w/ temporary worker" ///
  3 "Self employed w/ permanent worker" 4 "Government worker" ///
  5 "Private worker" 6 "Unpaid family worker" ///
  7 "Casual worker in non-agriculture" 8 "Casual worker in agriculture" ///
  98 "Don't know"

*** assign value labels
foreach v in work sidejob {
	la val `v' binary
}
la val workstat workstat

* reshape to wide
reshape wide work ind occ workstat sidejob, i(pidlink) j(year)

********************** 			merge with other modules

** merge with TK1
merge 1:1 pidlink using "$ifls_raw4/b3a_tk1"
drop _merge

** merge with TK2
merge 1:1 pidlink using "$ifls_raw4/b3a_tk2"
drop _merge

********************** 			construct/clean variables

*** work participation
*** define current employment
gen work2008 = 0 if work2007!=6
order work2008, after(work2007)
* if interview conducted in 2008 & work for pay
replace work2008 = 1 if tk01a==1 & work2007!=6
replace work2008 = 1 if tk01==1 & work2007!=6
replace work2008 = 1 if tk02==1 & work2007!=6
replace work2008 = 1 if tk03==1 & work2007!=6
replace work2008 = 1 if tk04==1 & work2007!=6
* if interview conducted in 2007 & work for pay
gen ivw07 = work2007==6
order ivw07, after(work2008)
replace work2007 = 1 if tk01a==1 & work2007==6
replace work2007 = 1 if tk01==1 & work2007==6
replace work2007 = 1 if tk02==1 & work2007==6
replace work2007 = 1 if tk03==1 & work2007==6
replace work2007 = 1 if tk04==1 & work2007==6
* if interview conducted in 2007 & not work for pay
replace work2007 = 0 if tk04==3 & work2007==6

*** if interview conducted in 2008
** replace past employment as not work if never work before
forval j=1999/2007 {
	replace work`j' = 0 if work`j'==. & tk05==3 & ivw07==0
}
** replace past employment as not work if worked before 99
forval j=1999/2007 {
	replace work`j' = 0 if work`j'==. & tk05==1 & tk06a==3 & ivw07==0
}
*** if interview conducted in 2007
** replace past employment as not work if never work before
forval j=1999/2006 {
	replace work`j' = 0 if work`j'==. & tk05==3 & ivw07==1
}
** replace past employment as not work if worked before 99
forval j=1999/2006 {
	replace work`j' = 0 if work`j'==. & tk05==1 & tk06a==3 & ivw07==1
}


*** work status
* if interview conducted in 2008
gen workstat2008 = tk24a if work2008!=.
* if interview conducted in 2007
replace workstat2007 = tk24a if work2008==. & work2007!=.

*** sidejob
* if interview conducted in 2008
gen sidejob2008 = tk27 if work2008!=.
* if interview conducted in 2007
replace sidejob2007 = tk27 if work2008==. & work2007!=.
* recode
forval j=7/8 {
	recode sidejob200`j' (3=0)
}
* value labels


*** occupation
* if interview conducted in 2008
gen occ2008 = occ07tk2 if work2008!=.
* if interview conducted in 2007
replace occ2007 = occ07tk2 if work2008==. & work2007!=.
/* Note: occ07tk3 is for sidejob; we don't use it because n/a in previous waves */
// Need to convert this to 2-digit codes like in previous waves

*** industry
* if interview conducted in 2008
gen ind2008 = tk19ab if work2008!=.
* if interview conducted in 2007
replace ind2007 = tk19ab if work2008==. & work2007!=.
/* Note: tk19ba is for sidejob; we don't use it because n/a in previous waves */

*** main activity
gen working_om2008 = tk01==1 if tk01<95 & work2008!=.
gen working_om2007 = tk01==1 if tk01<95 & work2008==. & work2007!=.
gen searching_om2008 = tk01==2 if tk01<95
gen searching_om2007 = tk01==2 if tk01<95 & work2008==. & work2007!=.
gen schooling_om2008 = tk01==3 if tk01<95
gen schooling_om2007 = tk01==3 if tk01<95 & work2008==. & work2007!=.
gen housekeeping_om2008 = tk01==4 if tk01<95
gen housekeeping_om2007 = tk01==4 if tk01<95 & work2008==. & work2007!=.

*** salary
gen wage_m2008 = tk25a1/1000 if work2008!=.
gen wage_m2007 = tk25a1/1000 if work2008==. & work2007!=.
gen wage_y2008 = tk25a2/1000 if work2008!=.
gen wage_y2007 = tk25a2/1000 if work2008==. & work2007!=.

*** profit
gen profit_m2008 = tk26a1/1000 if work2008!=.
gen profit_m2007 = tk26a1/1000 if work2008==. & work2007!=.
gen profit_y2008 = tk26a3/1000 if work2008!=.
gen profit_y2007 = tk26a3/1000 if work2008==. & work2007!=.

*** income
egen income_m2008 = rowtotal(wage_m2008 profit_m2008)
egen income_y2008 = rowtotal(wage_y2008 profit_y2008)
egen income_m2007 = rowtotal(wage_m2007 profit_m2007)
egen income_y2007 = rowtotal(wage_y2007 profit_y2007)
foreach x in m2008 y2008 m2007 y2007 {
	replace income_`x' = . if wage_`x'==. & profit_`x'==.
}

*** workhour
replace tk22a = . if tk22a>=95
gen workhour2008 = tk22a if work2008!=.
gen workhour2007 = tk22a if work2008==. & work2007!=.


********************** 			restructure data

*** keep necessary variables
keep pidlink work* sidejob* ind* occ* hhid07 pid07 ivw07 ///
	working_om* searching_om* schooling_om* housekeeping_om* ///
	wage_m* wage_y* profit_m* profit_y* income_m* income_y* workhour*
drop occ07tk*

********************** 			create variables consistent across waves

forval j=1999/2008 {

**** work
	* 1) yes/no otherwise missing
	gen work_om`j' = work`j'
	replace work_om`j' = . if work`j'>1

	* 2) yes otherwise no
	gen work_on`j' = work`j'==1

**** sidejob
	* step 1: no side job if not working
		replace sidejob`j' = 0 if work`j'==0

	* step 2:
		* 1) yes/no otherwise missing
		gen sidejob_om`j' = sidejob`j'
		replace sidejob_om`j' = . if sidejob`j'>1
		/* Note:
			- possible for sidejob to be defined and main work missing
			  since we code DK to missing
		*/

		* 2) yes otherwise no
		gen sidejob_on`j' = sidejob`j'==1


**** occupation
	* step 1: aggregate to first digit
		replace occ`j' = "ZZZ" if occ`j'=="999"
		gen occ1_`j' = substr(occ`j',2,1)
		gen occ2_`j' = substr(occ`j',2,2)
		drop occ`j'
		ren occ2_`j' occ`j'

	* step 2: create dummies for each heading
		* 1) yes/no otherwise missing (unconditional on working)
		gen professional_om`j' = occ1_`j'=="0" | occ1_`j'=="1" if work_om`j'!=.
		gen manager_om`j' = occ1_`j'=="2" if work_om`j'!=.
		gen clerk_om`j' = occ1_`j'=="3" if work_om`j'!=.
		gen sales_om`j' = occ1_`j'=="4" if work_om`j'!=.
		gen service_om`j' = occ1_`j'=="5" if work_om`j'!=.
		gen agricultural_om`j' = occ1_`j'=="6" if work_om`j'!=.
		gen production_om`j' = occ1_`j'=="7" if work_om`j'!=.

		* 2) yes otherwise no (unconditional on working)
		gen professional_on`j' = occ1_`j'=="0" | occ1_`j'=="1"
		gen manager_on`j' = occ1_`j'=="2"
		gen clerk_on`j' = occ1_`j'=="3"
		gen sales_on`j' = occ1_`j'=="4"
		gen service_on`j' = occ1_`j'=="5"
		gen agricultural_on`j' = occ1_`j'=="6"
		gen production_on`j' = occ1_`j'=="7"

		* 3) conditional on working
		gen professional_c`j' = occ1_`j'=="0" | occ1_`j'=="1" if work_on`j'==1
		gen manager_c`j' = occ1_`j'=="2" if work_on`j'==1
		gen clerk_c`j' = occ1_`j'=="3" if work_on`j'==1
		gen sales_c`j' = occ1_`j'=="4" if work_on`j'==1
		gen service_c`j' = occ1_`j'=="5" if work_on`j'==1
		gen agricultural_c`j' = occ1_`j'=="6" if work_on`j'==1
		gen production_c`j' = occ1_`j'=="7" if work_on`j'==1

**** work status
	* 1) yes/no otherwise missing (unconditional on working)
	gen selfemp_om`j' = inrange(workstat`j',1,3) if work_om`j'!=.
	gen govwork_om`j' = workstat`j'==4 if work_om`j'!=.
	gen pvtwork_om`j' = workstat`j'==5 | inrange(workstat`j',7,8) if work_om`j'!=.
	gen famwork_om`j' = workstat`j'==6 if work_om`j'!=.

	* 2) yes otherwise no (unconditional on working)
	gen selfemp_on`j' = inrange(workstat`j',1,3)
	gen govwork_on`j' = workstat`j'==4
	gen pvtwork_on`j' = workstat`j'==5 | inrange(workstat`j',7,8)
	gen famwork_on`j' = workstat`j'==6

	* 3) conditional on working
	gen selfemp_c`j' = inrange(workstat`j',1,3) if work_on`j'==1
	gen govwork_c`j' = workstat`j'==4 if work_on`j'==1
	gen pvtwork_c`j' = workstat`j'==5 | inrange(workstat`j',7,8) if work_on`j'==1
	gen famwork_c`j' = workstat`j'==6 if work_on`j'==1

**** industry
	* 1) yes/no otherwise missing (unconditional on working)
	gen agriculture_om`j' = ind`j'=="1" if work_om`j'!=.
	gen mining_om`j' = ind`j'=="2" if work_om`j'!=.
	gen manufacturing_om`j' = ind`j'=="3" if work_om`j'!=.
	gen utility_om`j' = ind`j'=="4" if work_om`j'!=.
	gen construction_om`j' = ind`j'=="5" if work_om`j'!=.
	gen trade_om`j' = ind`j'=="6" if work_om`j'!=.
	gen logistic_om`j' = ind`j'=="7" if work_om`j'!=.
	gen finance_om`j' = ind`j'=="8" if work_om`j'!=.
	gen social_om`j' = ind`j'=="9" if work_om`j'!=.

	* "2") yes otherwise no (unconditional on working)
	gen agriculture_on`j' = ind`j'=="1"
	gen mining_on`j' = ind`j'=="2"
	gen manufacturing_on`j' = ind`j'=="3"
	gen utility_on`j' = ind`j'=="4"
	gen construction_on`j' = ind`j'=="5"
	gen trade_on`j' = ind`j'=="6"
	gen logistic_on`j' = ind`j'=="7"
	gen finance_on`j' = ind`j'=="8"
	gen social_on`j' = ind`j'=="9"

	* "3") conditional on working
	gen agriculture_c`j' = ind`j'=="1" if work_on`j'==1
	gen mining_c`j' = ind`j'=="2" if work_on`j'==1
	gen manufacturing_c`j' = ind`j'=="3" if work_on`j'==1
	gen utility_c`j' = ind`j'=="4" if work_on`j'==1
	gen construction_c`j' = ind`j'=="5" if work_on`j'==1
	gen trade_c`j' = ind`j'=="6" if work_on`j'==1
	gen logistic_c`j' = ind`j'=="7" if work_on`j'==1
	gen finance_c`j' = ind`j'=="8" if work_on`j'==1
	gen social_c`j' = ind`j'=="9" if work_on`j'==1
}


********************** 			merge with education

merge 1:1 hhid07 pid07 pidlink using "$ifls_clean4/Education-All.dta", keep(1 3) keepusing(yos muslim)
drop _merge

merge m:1 hhid07 using "$raw/IFLS/pce_allwaves/pce07nom.dta", keep(1 3) keepusing(xnonfood xfood pce hhsize)
gen pce_food2007=xfood/hhsize if ivw07==1
gen pce_nonfood2007=xnonfood/hhsize if ivw07==1
gen pce_total2007=pce if ivw07==1
gen exp_food2007=xfood if ivw07==1
gen exp_nonfood2007=xnonfood if ivw07==1
gen exp_total2007=exp_food2007+exp_nonfood2007 if ivw07==1
gen pce_food2008=xfood/hhsize if ivw07==0
gen pce_nonfood2008=xnonfood/hhsize if ivw07==0
gen pce_total2008=pce if ivw07==0
gen exp_food2008=xfood if ivw07==0
gen exp_nonfood2008=xnonfood if ivw07==0
gen exp_total2008=exp_food2008+exp_nonfood2008 if ivw07==0
gen hhsize2007=hhsize if ivw==1
gen hhsize2008=hhsize if ivw==0
gen ivw2008=ivw07==0
drop _merge xnonfood xfood pce hhsize ivw07


save "$ifls_clean4/revised work history",replace

***************************************************************************
***PART 5: IFLS5
***************************************************************************

use "$ifls_raw5/b3a_tk3",clear

*** keep necessary variables
keep pidlink tk28year tk28 tk31a occ2014 tk33 tk33a

*** rename variables
ren tk28 work
ren tk33 workstat
ren tk33a sidejob
ren tk31a ind
ren occ2014 occ_
ren tk28year year

*** recode work
recode work (3=0) //(6 7 8 9 = .)
recode sidejob (3=0) //(6 7 8 9 = .)
*recode workstat (7 8 9 = .)

*** define label values
la def binary 1 "Yes" 0 "No" 8 "Don't know"
la def workstat 1 "Self employed w/o help" ///
  2 "Self employed w/ temporary worker" ///
  3 "Self employed w/ permanent worker" 4 "Government worker" ///
  5 "Private worker" 6 "Unpaid family worker" ///
  7 "Casual worker in non-agriculture" 8 "Casual worker in agriculture" ///
  98 "Don't know"

*** assign value labels
foreach v in work sidejob {
	la val `v' binary
}
la val workstat workstat

* reshape to wide
reshape wide work ind occ workstat sidejob, i(pidlink) j(year)

********************** 			merge with other modules

** merge with TK1
merge 1:1 pidlink using "$ifls_raw5/b3a_tk1"
drop _merge

** merge with TK2
*use "$ifls_raw5\b3a_tk1",clear
merge 1:1 pidlink using "$ifls_raw5/b3a_tk2"
drop _merge



********************** 			construct/clean variables

*** work participation
*** define current employment
gen work2015 = 0 if work2014!=6
order work2015, after(work2014)
* if interview conducted in 2015 & work for pay
replace work2015 = 1 if tk01a==1 & work2014!=6
replace work2015 = 1 if tk01==1 & work2014!=6
replace work2015 = 1 if tk02==1 & work2014!=6
replace work2015 = 1 if tk03==1 & work2014!=6
replace work2015 = 1 if tk04==1 & work2014!=6
* if interview conducted in 2014 & work for pay
gen ivw14 = work2014==6
order ivw14, after(work2015)
replace work2014 = 1 if tk01a==1 & work2014==6
replace work2014 = 1 if tk01==1 & work2014==6
replace work2014 = 1 if tk02==1 & work2014==6
replace work2014 = 1 if tk03==1 & work2014==6
replace work2014 = 1 if tk04==1 & work2014==6
* if interview conducted in 2007 & not work for pay
replace work2014 = 0 if tk04==3 & work2014==6

*** if interview conducted in 2015
** replace past employment as not work if never work before
forval j=2007/2014 {
	replace work`j' = 0 if work`j'==. & tk05==3 & ivw14==0
}
** replace past employment as not work if worked before 99
forval j=2007/2014 {
	replace work`j' = 0 if work`j'==. & tk05==1 & tk06a==3 & ivw14==0
}
*** if interview conducted in 2007
** replace past employment as not work if never work before
forval j=2007/2013 {
	replace work`j' = 0 if work`j'==. & tk05==3 & ivw14==1
}
** replace past employment as not work if worked before 99
forval j=2007/2013 {
	replace work`j' = 0 if work`j'==. & tk05==1 & tk06a==3 & ivw14==1
}

*** work status
* if interview conducted in 2015
gen workstat2015 = tk24a if work2015!=.
* if interview conducted in 2007
replace workstat2014 = tk24a if work2015==. & work2014!=.

*** sidejob
* if interview conducted in 2008
gen sidejob2015 = tk27 if work2015!=.
* if interview conducted in 2007
replace sidejob2014 = tk27 if work2015==. & work2014!=.
* recode
forval j=14/15 {
	recode sidejob20`j' (3=0)
}
* value labels


*** occupation
ren occ2014_primary occ14tk2
ren occ2014_secondary occ14tk3
ren occ_* occ*
* if interview conducted in 2008
gen occ2015 = occ14tk2 if work2015!=.
* if interview conducted in 2007
replace occ2014 = occ14tk2 if work2015==. & work2014!=.
/* Note: occ07tk3 is for sidejob; we don't use it because n/a in previous waves
	- no one changes occupation seemingly. can be data abberation or fact. likely abberation but assume fact.
	- people don't work has occ code. the above code will remove these phenomenon
*/
// Need to convert this to 2-digit codes like in previous waves


*** industry
* if interview conducted in 2008
gen ind2015 = tk19ab if work2015!=.
* if interview conducted in 2007
replace ind2014 = tk19ab if work2015==. & work2014!=.
/* Note: tk19ba is for sidejob; we don't use it because n/a in previous waves */

*** main activity
gen working_om2015 = tk01==1 if tk01<95 & work2015!=.
gen working_om2014 = tk01==1 if tk01<95 & work2015==. & work2014!=.
gen searching_om2015 = tk01==2 if tk01<95
gen searching_om2014 = tk01==2 if tk01<95 & work2015==. & work2014!=.
gen schooling_om2015 = tk01==3 if tk01<95
gen schooling_om2014 = tk01==3 if tk01<95 & work2015==. & work2014!=.
gen housekeeping_om2015 = tk01==4 if tk01<95
gen housekeeping_om2014 = tk01==4 if tk01<95 & work2015==. & work2014!=.

*** salary
gen wage_m2015 = tk25a1/1000 if work2015!=.
gen wage_m2014 = tk25a1/1000 if work2015==. & work2014!=.
gen wage_y2015 = tk25a2/1000 if work2015!=.
gen wage_y2014 = tk25a2/1000 if work2015==. & work2014!=.

*** profit
gen profit_m2015 = tk26a1/1000 if work2015!=.
gen profit_m2014 = tk26a1/1000 if work2015==. & work2014!=.
gen profit_y2015 = tk26a3/1000 if work2015!=.
gen profit_y2014 = tk26a3/1000 if work2015==. & work2014!=.

*** income
egen income_m2015 = rowtotal(wage_m2015 profit_m2015)
egen income_y2015 = rowtotal(wage_y2015 profit_y2015)
egen income_m2014 = rowtotal(wage_m2014 profit_m2014)
egen income_y2014 = rowtotal(wage_y2014 profit_y2014)
foreach x in m2015 y2015 m2014 y2014 {
	replace income_`x' = . if wage_`x'==. & profit_`x'==.
}

*** workhour
replace tk22a = . if tk22a>=95
gen workhour2015 = tk22a if work2015!=.
gen workhour2014 = tk22a if work2015==. & work2014!=.


********************** 			restructure data

*** keep necessary variables
keep pidlink work* sidejob* ind* occ* hhid14 hhid14_9 pid14 ivw14 ///
	working_om* searching_om* schooling_om* housekeeping_om* ///
	wage_m* wage_y* profit_m* profit_y* income_m* income_y* workhour*
drop occ14tk2 occ14tk3

********************** 			create variables consistent across waves

forval j=2007/2015 {

**** work
	* 1) yes/no otherwise missing
	gen work_om`j' = work`j'
	replace work_om`j' = . if work`j'>1

	* 2) yes otherwise no
	gen work_on`j' = work`j'==1

**** sidejob
	* step 1: no side job if not working
		replace sidejob`j' = 0 if work`j'==0

	* step 2:
		* 1) yes/no otherwise missing
		gen sidejob_om`j' = sidejob`j'
		replace sidejob_om`j' = . if sidejob`j'>1
		/* Note:
			- possible for sidejob to be defined and main work missing
			  since we code DK to missing
		*/

		* 2) yes otherwise no
		gen sidejob_on`j' = sidejob`j'==1


**** occupation
	* step 1: aggregate to first digit
		gen occ1_`j' = substr(occ`j',1,1)

	* step 2: create dummies for each heading
		* 1) yes/no otherwise missing (unconditional on working)
		gen professional_om`j' = occ1_`j'=="0" | occ1_`j'=="1" if work_om`j'!=.
		gen manager_om`j' = occ1_`j'=="2" if work_om`j'!=.
		gen clerk_om`j' = occ1_`j'=="3" if work_om`j'!=.
		gen sales_om`j' = occ1_`j'=="4" if work_om`j'!=.
		gen service_om`j' = occ1_`j'=="5" if work_om`j'!=.
		gen agricultural_om`j' = occ1_`j'=="6" if work_om`j'!=.
		gen production_om`j' = occ1_`j'=="7" if work_om`j'!=.

		* 2) yes otherwise no (unconditional on working)
		gen professional_on`j' = occ1_`j'=="0" | occ1_`j'=="1"
		gen manager_on`j' = occ1_`j'=="2"
		gen clerk_on`j' = occ1_`j'=="3"
		gen sales_on`j' = occ1_`j'=="4"
		gen service_on`j' = occ1_`j'=="5"
		gen agricultural_on`j' = occ1_`j'=="6"
		gen production_on`j' = occ1_`j'=="7"

		* 3) conditional on working
		gen professional_c`j' = occ1_`j'=="0" | occ1_`j'=="1" if work_on`j'==1
		gen manager_c`j' = occ1_`j'=="2" if work_on`j'==1
		gen clerk_c`j' = occ1_`j'=="3" if work_on`j'==1
		gen sales_c`j' = occ1_`j'=="4" if work_on`j'==1
		gen service_c`j' = occ1_`j'=="5" if work_on`j'==1
		gen agricultural_c`j' = occ1_`j'=="6" if work_on`j'==1
		gen production_c`j' = occ1_`j'=="7" if work_on`j'==1

**** work status
	* 1) yes/no otherwise missing (unconditional on working)
	gen selfemp_om`j' = inrange(workstat`j',1,3) if work_om`j'!=.
	gen govwork_om`j' = workstat`j'==4 if work_om`j'!=.
	gen pvtwork_om`j' = workstat`j'==5 | inrange(workstat`j',7,8) if work_om`j'!=.
	gen famwork_om`j' = workstat`j'==6 if work_om`j'!=.

	* 2) yes otherwise no (unconditional on working)
	gen selfemp_on`j' = inrange(workstat`j',1,3)
	gen govwork_on`j' = workstat`j'==4
	gen pvtwork_on`j' = workstat`j'==5 | inrange(workstat`j',7,8)
	gen famwork_on`j' = workstat`j'==6

	* 3) conditional on working
	gen selfemp_c`j' = inrange(workstat`j',1,3) if work_on`j'==1
	gen govwork_c`j' = workstat`j'==4 if work_on`j'==1
	gen pvtwork_c`j' = workstat`j'==5 | inrange(workstat`j',7,8) if work_on`j'==1
	gen famwork_c`j' = workstat`j'==6 if work_on`j'==1

**** industry
	* 1) yes/no otherwise missing (unconditional on working)
	gen agriculture_om`j' = ind`j'==1 if work_om`j'!=.
	gen mining_om`j' = ind`j'==2 if work_om`j'!=.
	gen manufacturing_om`j' = ind`j'==3 if work_om`j'!=.
	gen utility_om`j' = ind`j'==4 if work_om`j'!=.
	gen construction_om`j' = ind`j'==5 if work_om`j'!=.
	gen trade_om`j' = ind`j'==6 if work_om`j'!=.
	gen logistic_om`j' = ind`j'==7 if work_om`j'!=.
	gen finance_om`j' = ind`j'==8 if work_om`j'!=.
	gen social_om`j' = ind`j'==9 if work_om`j'!=.

	* 2) yes otherwise no (unconditional on working)
	gen agriculture_on`j' = ind`j'==1
	gen mining_on`j' = ind`j'==2
	gen manufacturing_on`j' = ind`j'==3
	gen utility_on`j' = ind`j'==4
	gen construction_on`j' = ind`j'==5
	gen trade_on`j' = ind`j'==6
	gen logistic_on`j' = ind`j'==7
	gen finance_on`j' = ind`j'==8
	gen social_on`j' = ind`j'==9

	* 3) conditional on working
	gen agriculture_c`j' = ind`j'==1 if work_on`j'==1
	gen mining_c`j' = ind`j'==2 if work_on`j'==1
	gen manufacturing_c`j' = ind`j'==3 if work_on`j'==1
	gen utility_c`j' = ind`j'==4 if work_on`j'==1
	gen construction_c`j' = ind`j'==5 if work_on`j'==1
	gen trade_c`j' = ind`j'==6 if work_on`j'==1
	gen logistic_c`j' = ind`j'==7 if work_on`j'==1
	gen finance_c`j' = ind`j'==8 if work_on`j'==1
	gen social_c`j' = ind`j'==9 if work_on`j'==1

	* turn ind to string
	tostring(ind`j'),replace force
}

********************** 			merge with education
merge 1:1 hhid14 pid14 pidlink using "$ifls_clean5/Education-All.dta", keep(1 3) keepusing(yos muslim)
drop _merge

********************** 			merge with expenditure capita
gen temphhid=hhid14
replace hhid14=hhid14_9
merge m:1 hhid14 using "$raw/IFLS/pce_allwaves/pce2014nom.dta", keep(1 3) keepusing(xnonfood xfood pce hhsize)
gen pce_food2014=xfood/hhsize if ivw14==1
gen pce_nonfood2014=xnonfood/hhsize if ivw14==1
gen pce_total2014=pce if ivw14==1
gen exp_food2014=xfood if ivw14==1
gen exp_nonfood2014=xnonfood if ivw14==1
gen exp_total2014=exp_food2014+exp_nonfood2014 if ivw14==1
gen pce_food2015=xfood/hhsize if ivw14==0
gen pce_nonfood2015=xnonfood/hhsize if ivw14==0
gen pce_total2015=pce if ivw14==0
gen exp_food2015=xfood if ivw14==0
gen exp_nonfood2015=xnonfood if ivw14==0
gen exp_total2015=exp_food2015+exp_nonfood2015 if ivw14==0
gen hhsize2014=hhsize if ivw14==1
gen hhsize2015=hhsize if ivw14==0
gen ivw2015=ivw14==0
drop _merge xnonfood xfood pce
replace hhid14=temphhid
drop temphhid hhid14_9 hhsize



save "$ifls_clean5/revised work history",replace



***************************************************************************
*** PART 5: Get HH location
***************************************************************************

use "$ifls_clean1/revised work history",clear
merge m:1 hhid93 using "$ifls_raw1/bukksc1", keepusing(sc01 sc02 sc03 sc05 sc07 commid93)
keep if _merge==3
drop _merge
save "$ifls_clean1/revised work history",replace

use "$ifls_clean2/revised work history",clear
merge m:1 hhid97 using "$ifls_raw2/htrack", keepusing(sc02_93r sc03_93r sc01_97 sc02_97 sc03_97 sc05_97)
keep if _merge==3
drop _merge
save "$ifls_clean2/revised work history",replace

use "$ifls_clean3/revised work history",clear
merge m:1 hhid00 using "$ifls_clean3/htrack", keepusing(sc030099 sc020099 sc010099 sc030000 sc020000 sc010000)
keep if _merge==3
drop _merge
merge m:1 hhid00 using "$ifls_raw3/bk_sc", keepusing(sc05)
keep if _merge==3
drop _merge
ren sc05 sc05_00
save "$ifls_clean3/revised work history",replace

use "$ifls_clean4/revised work history",clear
merge m:1 hhid07 using "$ifls_clean4/htrack", keepusing(sc010700 sc020700 sc030700 sc010707 sc020707 sc030707)
keep if _merge==3
drop _merge
merge m:1 hhid07 using "$ifls_raw4/bk_sc", keepusing(sc05)
keep if _merge==3
drop _merge
ren sc05 sc05_07
save "$ifls_clean4/revised work history",replace

use "$ifls_clean5/revised work history",clear
merge m:1 hhid14 using "$ifls_clean5/htrack", keepusing(sc01_14_14 sc02_14_14 sc03_14_14 sc01_14_07 sc02_14_07 sc03_14_07 sc01_14_00 sc02_14_00 sc03_14_00)
keep if _merge==3
drop _merge
merge m:1 hhid14 using "$ifls_raw5/bk_sc1", keepusing(sc05)
keep if _merge==3
drop _merge
ren sc05 sc05_14
save "$ifls_clean5/revised work history",replace



***************************************************************************
*** PART 6: Merge all work history from all waves
***************************************************************************

*** assume that earlier recall is more accurate thus only update, instead of replace

use "$ifls_clean1/revised work history",clear
merge 1:1 pidlink using "$ifls_clean2/revised work history", update
ren _merge i2tkmerge
merge 1:1 pidlink using "$ifls_clean3/revised work history", update
ren _merge i3tkmerge
merge 1:1 pidlink using "$ifls_clean4/revised work history", update
ren _merge i4tkmerge
merge 1:1 pidlink using "$ifls_clean5/revised work history", update
ren _merge i5tkmerge

order work* i2tkmerge i3tkmerge i4tkmerge i5tkmerge, after(pidlink)

*** count # interviews
gen numivw = 0
order numivw, after(i5tkmerge)
** ifls2
replace numivw = 1 if inrange(i2tkmerge,1,2)
replace numivw = 2 if inrange(i2tkmerge,3,5)
** ifls3
replace numivw = 1 if i3tkmerge==2
replace numivw = numivw+1 if inrange(i3tkmerge,3,5)
** ifls4
replace numivw = 1 if i4tkmerge==2
replace numivw = numivw+1 if inrange(i4tkmerge,3,5)
** ifls5
replace numivw = 1 if i5tkmerge==2
replace numivw = numivw+1 if inrange(i5tkmerge,3,5)

**** merge ptrack
merge 1:1 pidlink using "$ifls_clean5/ptrack"
keep if _merge==3
drop _merge


**** harmonize districts to common boundary base: has to be 1993 (because need to merge with podes)

*** special for 2014 because use 2007 boundaries
gen kabcode_des07 = sc01_14_07*100 + sc02_14_07
** Convert districts to 1993 boundaries
merge m:1 kabcode_des07 using "$clean/District codes/kab_codes_93_07", keep(1 3) keepusing(kabcode_des93) update
tab kabcode_des07 if _merge==1,m
drop _merge
ren kabcode_des93 kab_1493
ren kabcode_des07 kabcode14_des07

** Generate 4-digit kabupaten code for other waves
gen kabcode_des07 = sc010707*100 + sc020707
gen kabcode_des99 = sc010000*100 + sc020000
gen kabcode_des97 = sc01_97*100 + sc02_97

** Convert districts to 1993 boundaries
foreach y in 07 99 97{
merge m:1 kabcode_des`y' using "$clean/District codes/kab_codes_93_`y'", ///
	update keep(1 3 4 5) keepusing(kabcode_des93)
tab kabcode_des`y' if _merge==1,m
drop _merge
ren kabcode_des93 kab_`y'93
}

** districts in 1993
gen kab_9393 = sc01*100 + sc02

*** assign highest level of education
/* Note: can potentially assign education level the same way as assigning
		 current kab, but may be troublesome if many fluctuate from high to low
*/

* latest education reporting trumps previous education reporting
gen yos = yos14
replace yos = yos07 if missing(yos)
replace yos = yos00 if missing(yos)
replace yos = yos97 if missing(yos)
replace yos = yos93 if missing(yos)
tab yos,m

*religion
gen muslim=muslim14
replace muslim = muslim07 if missing(muslim)
replace muslim = muslim00 if missing(muslim)
replace muslim = muslim97 if missing(muslim)
replace muslim = muslim93 if missing(muslim)

*** reshape: later
reshape long work work_om work_on sidejob sidejob_om sidejob_on workstat ///
	selfemp_om selfemp_on selfemp_c ///
	govwork_om govwork_on govwork_c ///
	pvtwork_om pvtwork_on pvtwork_c ///
	famwork_om famwork_on famwork_c ///
	occ occ1_ ///
	professional_om professional_on professional_c ///
	manager_om manager_on manager_c ///
	clerk_om clerk_on clerk_c ///
	sales_om sales_on sales_c ///
	service_om service_on service_c ///
	agricultural_om agricultural_on agricultural_c ///
	production_om production_on production_c ///
	ind ///
	agriculture_om agriculture_on agriculture_c ///
	mining_om mining_on mining_c ///
	manufacturing_om manufacturing_on manufacturing_c ///
	utility_om utility_on utility_c ///
	construction_om construction_on construction_c ///
	trade_om trade_on trade_c ///
	logistic_om logistic_on logistic_c ///
	finance_om finance_on finance_c ///
	social_om social_on social_c ///
	working_om searching_om schooling_om housekeeping_om ///
	wage_m wage_y profit_m profit_y income_m income_y workhour ///
	pce_total pce_food pce_nonfood exp_food exp_nonfood exp_total hhsize ivw ///
	, i(pidlink) j(year)
sort pidlink year

*** assign current kab
capt drop kabcurrent
gen kabcurrent = .
* ifls1
replace kabcurrent = kab_9393 if inrange(year,1988,1993) & (i2tkmerge==1| inrange(i2tkmerge,3,5)) & work!=. & kabcurrent==.
replace kabcurrent = kab_9793 if inrange(year,1988,1993) & (i2tkmerge==2) & work!=. & kabcurrent==.
* ifls2
replace kabcurrent = kab_9793 if inrange(year,1994,1997) & inrange(i2tkmerge,2,5) & work!=. & kabcurrent==.
replace kabcurrent = kab_9993 if inrange(year,1994,1997) & inrange(i3tkmerge,2,5) & work!=. & kabcurrent==.
* ifls3
replace kabcurrent = kab_9993 if inrange(year,1998,2000) & inrange(i3tkmerge,2,5) & work!=. & kabcurrent==.
replace kabcurrent = kab_0793 if inrange(year,1998,2000) & inrange(i4tkmerge,2,5) & work!=. & kabcurrent==.
* ifls4
replace kabcurrent = kab_0793 if inrange(year,2001,2008) & inrange(i4tkmerge,2,5) & work!=. & kabcurrent==.
replace kabcurrent = kab_1493 if inrange(year,2001,2008) & inrange(i5tkmerge,2,5) & work!=. & kabcurrent==.
* ifls5
replace kabcurrent = kab_1493 if inrange(year,2009,2015) & inrange(i5tkmerge,2,5) & work!=. & kabcurrent==.

*** assign current urban residence
capt drop urban
gen urban = .
* ifls1
replace urban = sc05 if inrange(year,1988,1993) & (i2tkmerge==1| inrange(i2tkmerge,3,5)) & work!=. & urban==.
replace urban = sc05_97 if inrange(year,1988,1993) & (i2tkmerge==2) & work!=. & urban==.
* ifls2
replace urban = sc05_97 if inrange(year,1994,1997) & inrange(i2tkmerge,2,5) & work!=. & urban==.
replace urban = sc05_00 if inrange(year,1994,1997) & inrange(i3tkmerge,2,5) & work!=. & urban==.
* ifls3
replace urban = sc05_00 if inrange(year,1998,2000) & inrange(i3tkmerge,2,5) & work!=. & urban==.
replace urban = sc05_07 if inrange(year,1998,2000) & inrange(i4tkmerge,2,5) & work!=. & urban==.
* ifls4
replace urban = sc05_07 if inrange(year,2001,2008) & inrange(i4tkmerge,2,5) & work!=. & urban==.
replace urban = sc05_14 if inrange(year,2001,2008) & inrange(i5tkmerge,2,5) & work!=. & urban==.
* ifls5
replace urban = sc05_14 if inrange(year,2009,2015) & inrange(i5tkmerge,2,5) & work!=. & urban==.

*** define age
gen agecurrent = year - bth_year

*** 19-45 in each round
foreach k in 1993 1997 2000 2007 2014 {
	gen a1945_`k' = inrange(agecurrent,19,45) if year==`k'
}
** within individual age 19-45 at each round
foreach k in 1993 1997 2000 2007 2014 {
	bys pidlink: egen age1945_`k' = max(a1945_`k')
}
** total age 19-45 in 5 rounds
egen age1945 = rowtotal(age1945_1993 age1945_1997 age1945_2000 age1945_2007 age1945_2014)

************* transform monetary values

*** merge with CPI
merge m:1 year using "$raw/nationalcpi"
keep if _merge==3
drop _merge

*** apply transformation
foreach v in wage_m wage_y profit_m profit_y income_m income_y workhour pce_total pce_food pce_nonfood exp_food exp_nonfood exp_total{
	** real
	gen r`v' = `v'/cpi*100
	** log nominal
	gen l`v' = log(`v')
	replace l`v' = log(0.1) if `v'==0
	** ihs nominal
	gen i`v' = asinh(`v')
	** log real
	gen lr`v' = log(r`v')
	replace lr`v' = log(0.1) if `v'==0
	** ihs real
	gen ir`v' = asinh(r`v')
}
drop rworkhour lrworkhour irworkhour

*** sample selection
keep if sex==3
keep if numivw>=2
keep if age1945>=2

*** keep necessary variables
keep pidlink year kabcurrent agecurrent bth_year sex numivw age1945 work hhid* urban yos* muslim* ///
	work work_om work_on sidejob sidejob_om sidejob_on workstat ///
	selfemp_om selfemp_on selfemp_c ///
	govwork_om govwork_on govwork_c ///
	pvtwork_om pvtwork_on pvtwork_c ///
	famwork_om famwork_on famwork_c ///
	occ occ1_ ///
	professional_om professional_on professional_c ///
	manager_om manager_on manager_c ///
	clerk_om clerk_on clerk_c ///
	sales_om sales_on sales_c ///
	service_om service_on service_c ///
	agricultural_om agricultural_on agricultural_c ///
	production_om production_on production_c ///
	ind ///
	agriculture_om agriculture_on agriculture_c ///
	mining_om mining_on mining_c ///
	manufacturing_om manufacturing_on manufacturing_c ///
	utility_om utility_on utility_c ///
	construction_om construction_on construction_c ///
	trade_om trade_on trade_c ///
	logistic_om logistic_on logistic_c ///
	finance_om finance_on finance_c ///
	social_om social_on social_c ///
	working_om searching_om schooling_om housekeeping_om ///
	wage_m wage_y profit_m profit_y income_m income_y workhour ///
	rwage_m rwage_y rprofit_m rprofit_y rincome_m rincome_y ///
	lwage_m lwage_y lprofit_m lprofit_y lincome_m lincome_y lworkhour ///
	iwage_m iwage_y iprofit_m iprofit_y iincome_m iincome_y iworkhour ///
	lrwage_m lrwage_y lrprofit_m lrprofit_y lrincome_m lrincome_y ///
	irwage_m irwage_y irprofit_m irprofit_y irincome_m irincome_y ///
	pce_total pce_food pce_nonfood /// 
	rpce_total rpce_food rpce_nonfood lpce_total lpce_food lpce_nonfood ///
	irpce_total irpce_food irpce_nonfood lrpce_total lrpce_food lrpce_nonfood ///
	exp_total exp_nonfood exp_food lrexp_total lrexp_nonfood lrexp_food ///
	irexp_total irexp_nonfood irexp_food hhsize ivw

*** label variables
la var yos "Years of schooling"
la var kabcurrent "Current district of residence"
la var urban "Urban"

save "$clean/IFLS/Household/revised work history",replace


***************************************************************************
***PART 7: Clean pregnancy module
***************************************************************************

******************
*     WAVE 1     *
******************

use "$ifls_raw1/buk4ch1",clear
keep hhid93 pid93 pidlink ch06 ch08 ch09day ch09mth ch09yr ///
	 ch10 ch11 prgid ch25 ch27a ch27b ch28

* rename variables
ren ch06 prgout
ren ch08 chsex
ren ch09day chbday
ren ch09mth chbmo
ren ch09yr chbyr
ren ch10 chbpage
ren ch11 consistent
ren ch25 chalive
ren ch27a chinhh
ren ch27b chpid
ren ch28 chbrfed

* clean missing values
la def binary 0 "No" 1 "Yes"
replace prgout=. if prgout>4
gen chfem=1 if chsex==3
replace chfem=0 if chsex==1
drop chsex
replace chbday = . if chbday>31
replace chbmo = . if chbmo>12
replace chbyr = . if chbyr>94
replace chbpage=. if chbpage>=95
foreach x in consistent chalive chinhh chbrfed chfem {
replace `x'=. if `x'>3
recode `x' (3=0)
la val `x' binary
}
replace chpid = . if chpid>=95
replace chbyr = 1900+chbyr
gen wave=1
save "$ifls_clean1/b4_ch1",replace

******************
*     WAVE 2     *
******************

use "$ifls_raw2/b4_ch1",clear
merge m:1 hhid97 pid97 pidlink using "$ifls_raw2/b4_ch0", ///
	keep(1 3) keepusing(ch11)
keep hhid97 pid97 pidlink ch05 ch06 ch08 ch09day ch09mth ch09yr ///
	 ch10a ch11 ch25 ch27 ch27x1 ch24a
ren ch05 prgid
ren ch06 prgout
ren ch08 chsex
ren ch09day chbday
ren ch09mth chbmo
ren ch09yr chbyr
ren ch10a chbpage
ren ch11 consistent
ren ch25 chalive
ren ch27x1 chinhh
ren ch27 chpid
ren ch24a chbrfed

* clean missing values
la def binary 0 "No" 1 "Yes"
replace prgout=. if prgout>4
gen chfem=1 if chsex==3
replace chfem=0 if chsex==1
drop chsex
replace chbday = . if chbday>31
replace chbmo = . if chbmo>12
replace chbyr = . if chbyr>1998
replace chbpage=. if chbpage>=95
foreach x in consistent chalive chinhh chbrfed chfem {
replace `x'=. if `x'>3
recode `x' (3=0)
la val `x' binary
}
replace chpid = . if chpid>=95
gen wave=2
save "$ifls_clean2/b4_ch1",replace

******************
*     WAVE 3     *
******************

use "$ifls_raw3/b4_ch1",clear
merge m:1 hhid00 pid00 pidlink using "$ifls_raw3/b4_ch0", ///
	keep(1 3) keepusing(ch11)
keep hhid00 pid00 pidlink ch05 ch06 ch08 ch09day ch09mth ch09yr ///
	 ch10a ch11 ch25 ch27 ch27x1 ch24a
ren ch05 prgid
ren ch06 prgout
ren ch08 chsex
ren ch09day chbday
ren ch09mth chbmo
ren ch09yr chbyr
ren ch10a chbpage
ren ch11 consistent
ren ch25 chalive
ren ch27x1 chinhh
ren ch27 chpid
ren ch24a chbrfed

* clean missing values
la def binary 0 "No" 1 "Yes"
replace prgout=. if prgout>4
gen chfem=1 if chsex==3
replace chfem=0 if chsex==1
drop chsex
replace chbday = . if chbday>31
replace chbmo = . if chbmo>12
replace chbyr = . if chbyr>2001
replace chbpage=. if chbpage>=95
foreach x in consistent chalive chinhh chbrfed chfem {
replace `x'=. if `x'>3
recode `x' (3=0)
la val `x' binary
}
replace chpid = . if chpid>=95
gen wave=3
save "$ifls_clean3/b4_ch1",replace

******************
*     WAVE 4     *
******************

use "$ifls_raw4/b4_ch1",clear
merge m:1 hhid07 pid07 pidlink using "$ifls_raw4/b4_ch0", ///
	keep(1 3) keepusing(ch11)
keep hhid07 pid07 pidlink ch05 ch06 ch08 ch09day ch09mth ch09yr ///
	 ch10a ch11 ch25 ch27 ch24a
ren ch05 prgid
ren ch06 prgout
ren ch08 chsex
ren ch09day chbday
ren ch09mth chbmo
ren ch09yr chbyr
ren ch10a chbpage
ren ch11 consistent
ren ch25 chalive
ren ch27 chpid
ren ch24a chbrfed

* clean missing values
la def binary 0 "No" 1 "Yes"
replace prgout=. if prgout>4
gen chfem=1 if chsex==3
replace chfem=0 if chsex==1
drop chsex
replace chbday = . if chbday>31
replace chbmo = . if chbmo>12
replace chbyr = . if chbyr>2008
replace chbpage=. if chbpage>=95
foreach x in consistent chalive chbrfed chfem {
replace `x'=. if `x'>3
recode `x' (3=0)
la val `x' binary
}
replace chpid = . if chpid>=95
gen wave=4
save "$ifls_clean4/b4_ch1",replace

******************
*     WAVE 5     *
******************

use "$ifls_raw5/b4_ch1",clear
merge m:1 hhid14 pid14 pidlink using "$ifls_raw5/b4_ch0", ///
	keep(1 3) keepusing(ch11)
keep hhid14 pid14 pidlink ch05 ch06 ch08 ch09day ch09mth ch09yr ///
	 ch10a ch11 ch25 ch27 ch27x1 ch24a
ren ch05 prgid
ren ch06 prgout
ren ch08 chsex
ren ch09day chbday
ren ch09mth chbmo
ren ch09yr chbyr
ren ch10a chbpage
ren ch11 consistent
ren ch25 chalive
ren ch27x1 chinhh
ren ch27 chpid
ren ch24a chbrfed

* clean missing values
la def binary 0 "No" 1 "Yes"
replace prgout=. if prgout>4
gen chfem=1 if chsex==3
replace chfem=0 if chsex==1
drop chsex
replace chbday = . if chbday>31
replace chbmo = . if chbmo>12
replace chbyr = . if chbyr>2015
replace chbpage=. if chbpage>=95
foreach x in consistent chalive chinhh chbrfed chfem {
replace `x'=. if `x'>3
recode `x' (3=0)
la val `x' binary
}
replace chpid = . if chpid>=95
gen wave=5
save "$ifls_clean5/b4_ch1",replace



***************************************************************************
***PART 8: Clean marital history module
***************************************************************************

************
*  Wave 1  *
************
use "$ifls_raw1/buk4kw2",clear
keep if marrnum==1
replace kw05a = . if kw05a>93
replace kw05a = 1900+kw05a
replace kw06 = . if kw06>=95
ren kw05a firstwed_yr
ren kw06 firstwed_age
save "$ifls_clean1/b4_kw2",replace

************
*  Wave 2  *
************
use "$ifls_raw2/b4_kw2",clear
gsort pidlink -kwn
egen firstwed = tag(pidlink)
keep if firstwed==1
ren kw10yr firstwed_yr
ren kw11 firstwed_age
save "$ifls_clean2/b4_kw2",replace

************
*  Wave 3  *
************
use "$ifls_raw3/b4_kw3",clear
gsort pidlink -kwn
egen firstwed = tag(pidlink)
keep if firstwed==1
replace kw11 = . if kw11>=95
ren kw10yr firstwed_yr
ren kw11 firstwed_age
save "$ifls_clean3/b4_kw2",replace

************
*  Wave 4  *
************
use "$ifls_raw4/b4_kw2",clear
gsort pidlink -kwn
egen firstwed = tag(pidlink)
keep if firstwed==1
replace kw10yr = . if kw10yr > 2008
replace kw11 = . if kw11>=95
ren kw10yr firstwed_yr
ren kw11 firstwed_age
save "$ifls_clean4/b4_kw2",replace

************
*  Wave 5  *
************
use "$ifls_raw5/b4_kw3",clear
gsort pidlink -kwn_num
egen firstwed = tag(pidlink)
drop if firstwed==0
replace kw11 = . if kw11==98
replace kw10yr = . if kw10yr==9998
ren kw10yr firstwed_yr
ren kw11 firstwed_age
save "$ifls_clean5/b4_kw2",replace

***************************************************************************
***PART 9: Clean pregnancy history to account for livebirth pregnancies only
***************************************************************************

use "$ifls_clean1/b4_ch1",clear
append using "$ifls_clean2/b4_ch1"
append using "$ifls_clean3/b4_ch1"
append using "$ifls_clean4/b4_ch1"
append using "$ifls_clean5/b4_ch1"
sort pidlink wave prgid

* define label
la def prgout 1 "Pregnant" 2 "Alive" 3 "Stillbirth" 4 "Miscarriage"
la val prgout prgout

sort pidlink wave prgid
order pidlink wave prgid prgout

/* Notes:
	- need to check across wave if repeated birth year/month/day
	- what to do with same year, month, but day +- 3?
	- missing children in between other kids in next wave
	- next pregnancy list earlier year
	- dead children?
*/

** keep livebirth only
keep if prgout==2

** keep living child only
drop if chalive==0

merge m:1 pidlink using "$ifls_clean5/ptrack", keepusing(bth_year)
drop if _merge==2

** infer year child was born given age when child was born
replace chbyr = bth_year + chbpage if chbyr==.

unique wave, by(pidlink) gen(wavenum)
bys pidlink: egen wavenum_ = max(wavenum)

**** mass cleaning method 1: drop all pregnancies year earlier than the last
**** livebirth in previous wave
sort pidlink wave prgid
by pidlink wave: egen maxyear = max(chbyr)

*** assign max livebirth year in previous wave
* identify first wave
by pidlink: egen minwave = min(wave)
* identify subsequent waves
gen newwave = prgid==1 & wave>minwave
* obtain last wave's max livebirth year
gen lmaxyear = maxyear[_n-1] if newwave==1
* assign last wave's max livebirth year in current wave
by pidlink wave: egen maxlmaxyear = max(lmaxyear)

*** drop pregnancies occur earlier than last livebirth pregnancy in previous wave
drop if chbyr <= maxlmaxyear & maxlmaxyear!=.
* drop 4,995 observations

*** clean constructed variables
drop _merge wavenum lmaxyear
la var wavenum_ "number of waves tracked"
la var maxyear "last year of livebirth in current wave"
la var minwave "first wave found"
la var newwave "1=indicate first year of subsequent wave"
la var maxlmaxyear "last year of livebirth in previous wave"

save "$clean/IFLS/Household/livebirth pregnancies",replace

***************************************************************************
***PART 10: count number of kids
***************************************************************************

use "$clean/IFLS/Household/livebirth pregnancies",clear

sort pidlink prgid

***** define age of kid between 1988-2015
forval t = 1988/2015 {
	gen kidage_`t' = `t'-chbyr
	replace kidage_`t' = . if kidage_`t' < 0
}

***** define dummies for each age category
** 0-2
forval t = 1988/2015 {
	gen kid02_`t' = inrange(kidage_`t',0,2)
}
** 3-5
forval t = 1988/2015 {
	gen kid35_`t' = inrange(kidage_`t',3,5)
}
** 3-6
forval t = 1988/2015 {
	gen kid36_`t' = inrange(kidage_`t',3,6)
}
** 6-18
forval t = 1988/2015 {
	gen kid618_`t' = inrange(kidage_`t',6,18)
}
** 7-18
forval t = 1988/2015 {
	gen kid718_`t' = inrange(kidage_`t',7,18)
}
** 0-99
forval t = 1988/2015 {
	gen kid099_`t' = inrange(kidage_`t',0,99)
}

***** define dummies for discrete age category
forval t = 1988/2015 {
forval j = 0/17 {
	gen kid`j'_`t' = kidage_`t'==`j'
}
gen kid18_`t' = kidage_`t'>=18 & kidage_`t'!=.
}

***** collapse data to mother level (currently child level)
collapse (sum) kid02* kid35* kid36* kid618* kid718* kid099* kid0_1988-kid18_2015, by(pidlink)
/* check pidlink: 001220002
all good
*/

***** reshape each year as a row
reshape long kid02_ kid35_ kid36_ kid618_ kid718_ kid099_ ///
	kid0_ kid1_ kid2_ kid3_ kid4_ kid5_ kid6_ kid7_ kid8_ kid9_ kid10_ kid11_ ///
	kid12_ kid13_ kid14_ kid15_ kid16_ kid17_ kid18_, i(pidlink) j(year)

ren kid*_ kid*

foreach g in 02 35 36 618 718 099 {
	la var kid`g' "Total kids between age `g'"
}

forval j=0/17 {
	la var kid`j' "Total kids age `j'"
}
la var kid18 "Total kids age 18+"

***** define dummies for any nonzero child
foreach g in 02 35 36 618 718 099 {
	gen kid`g'_ = kid`g'>0 & kid`g'!=.
	la var kid`g'_ "Have kids between age `g'"
}
forval g=0/18 {
	gen kid`g'_ = kid`g'>0 & kid`g'!=.
	la var kid`g'_ "Have kids age `g'"
}
la var kid18_ "Have kids age 18+"

save "$clean/IFLS/Household/Kids in age group", replace

***************************************************************************
***PART 11: Keep only first borns
***************************************************************************

use "$clean/IFLS/Household/livebirth pregnancies",clear

sort pidlink chbyr

***** define age of kid between 1988-2015
forval t = 1988/2015 {
	gen kidage_`t' = `t'-chbyr
	replace kidage_`t' = . if kidage_`t' < 0
}

**** keep only first born
egen minbirth = min(chbyr), by(pidlink)
keep if chbyr==minbirth // keep first livebirth

***** define dummies for discrete age category
forval t = 1988/2015 {
forval j = 0/17 {
	gen firstkid`j'_`t' = kidage_`t'==`j'
}
gen firstkid18_`t' = kidage_`t'>=18 & kidage_`t'!=.
}

***** collapse data to mother level (currently child level)
collapse (sum) firstkid*, by(pidlink)

***** reshape each year as a row
reshape long firstkid0_ firstkid1_ firstkid2_ firstkid3_ firstkid4_ firstkid5_ ///
	firstkid6_ firstkid7_ firstkid8_ firstkid9_ firstkid10_ firstkid11_ ///
	firstkid12_ firstkid13_ firstkid14_ firstkid15_ firstkid16_ firstkid17_ ///
	firstkid18_, i(pidlink) j(year)

ren firstkid*_ firstkid*

forval j=0/17 {
	la var firstkid`j' "Total first kids age `j'"
}
la var firstkid18 "Total first kids age 18+"

forval g=0/18 {
	gen firstkid`g'_ = firstkid`g'>0 & firstkid`g'!=.
	la var firstkid`g'_ "Have first kids age `g'"
}
la var firstkid18_ "Have first kids age 18+"

save "$clean/IFLS/Household/First kid in age group",replace

***************************************************************************
***PART 12: Define eligibility by birth order
***************************************************************************

use "$clean/IFLS/Household/livebirth pregnancies",clear

sort pidlink chbyr
by pidlink: gen birthorder = _n
order birthorder, after(prgid)
egen maxbirthorder = max(birthorder), by(pidlink)
sum maxbirthorder if birthorder==1,d

***** define age of kid between 1988-2015
forval t = 1988/2015 {
	gen kidage_`t' = `t'-chbyr
	replace kidage_`t' = . if kidage_`t' < 0
}

***** define dummies for each age category for each birth order kid
* birth order
forval j = 1/6 {
	** 0-2
	forval t = 1988/2015 {
		gen kid02_`j'_`t' = inrange(kidage_`t',0,2) if birthorder==`j'
	}
	** 3-5
	forval t = 1988/2015 {
		gen kid35_`j'_`t' = inrange(kidage_`t',3,5) if birthorder==`j'
	}
	** 3-6
	forval t = 1988/2015 {
		gen kid36_`j'_`t' = inrange(kidage_`t',3,6) if birthorder==`j'
	}
	** 6-18
	forval t = 1988/2015 {
		gen kid618_`j'_`t' = inrange(kidage_`t',6,18) if birthorder==`j'
	}
	** 7-18
	forval t = 1988/2015 {
		gen kid718_`j'_`t' = inrange(kidage_`t',7,18) if birthorder==`j'
	}
}



***** collapse data to mother level (currently child level)
collapse (sum) kid02* kid35* kid36* kid618* kid718* (max) birthorder, by(pidlink)
/* check pidlink: 001220002
all good
*/

***** reshape each year as a row
reshape long kid02_1_ kid35_1_ kid36_1_ kid618_1_ kid718_1_ ///
	kid02_2_ kid35_2_ kid36_2_ kid618_2_ kid718_2_ ///
	kid02_3_ kid35_3_ kid36_3_ kid618_3_ kid718_3_ ///
	kid02_4_ kid35_4_ kid36_4_ kid618_4_ kid718_4_ ///
	kid02_5_ kid35_5_ kid36_5_ kid618_5_ kid718_5_ ///
	kid02_6_ kid35_6_ kid36_6_ kid618_6_ kid718_6_ ///
	, i(pidlink) j(year)

forval j=1/6 {
	ren kid*_`j'_ kid`j'_age*
}

*** replace missing if no kid after max birth order
foreach x in 02 35 36 618 718 {
	forval j=1/6 {
		replace kid`j'_age`x' = 1 if kid`j'_age`x'>1 //make sure it's just a dummy
		replace kid`j'_age`x' = . if birthorder<`j'
	}
}

forval j=1/6 {
foreach g in 02 35 36 618 718 {
	la var kid`j'_age`g' "Kid `j' between age `g'"
}
}

save "$clean/IFLS/Household/Kid order in age group",replace

***************************************************************************
***PART 13: Individual fertility
***************************************************************************

use "$clean/IFLS/Household/livebirth pregnancies",clear
collapse (count) child=prgid (min) firstbirth=chbyr (max) lastbirth=chbyr (firstnm) bth_year, by(pidlink)
save "$clean/IFLS/Household/Individual fertility",replace

***************************************************************************
***PART 14: Keep only last kid
***************************************************************************

use "$clean/IFLS/Household/livebirth pregnancies",clear

sort pidlink chbyr

***** define age of kid between 1988-2015
forval t = 1988/2015 {
	gen kidage_`t' = `t'-chbyr
	replace kidage_`t' = . if kidage_`t' < 0
}

**** keep only last born
egen maxbirth = max(chbyr), by(pidlink)
keep if chbyr==maxbirth // keep last livebirth

***** define dummies for discrete age category
forval t = 1988/2015 {
forval j = 0/17 {
	gen lastkid`j'_`t' = kidage_`t'==`j'
}
gen lastkid18_`t' = kidage_`t'>=18 & kidage_`t'!=.
}

***** collapse data to mother level (currently child level)
collapse (sum) lastkid*, by(pidlink)

***** reshape each year as a row
reshape long lastkid0_ lastkid1_ lastkid2_ lastkid3_ lastkid4_ lastkid5_ ///
	lastkid6_ lastkid7_ lastkid8_ lastkid9_ lastkid10_ lastkid11_ ///
	lastkid12_ lastkid13_ lastkid14_ lastkid15_ lastkid16_ lastkid17_ ///
	lastkid18_, i(pidlink) j(year)

ren lastkid*_ lastkid*

forval j=0/17 {
	la var lastkid`j' "Total last kids age `j'"
}
la var lastkid18 "Total last kids age 18+"

forval g=0/18 {
	gen lastkid`g'_ = lastkid`g'>0 & lastkid`g'!=.
	la var lastkid`g'_ "Have last kids age `g'"
}
la var lastkid18_ "Have last kids age 18+"

save "$clean/IFLS/Household/Last kid in age group",replace


***************************************************************************
***PART 15: Merge intergenerational panel with Podes preschool data
***************************************************************************

clear all
set maxvar 10000
set matsize 10000

* obtain count of preschools in IFLS intergenerational panel regions
use "$clean/IFLS/Household/revised work history", clear

*** merge kindergarten data from PODES
ren kabcurrent kabcode_des93
merge m:1 kabcode_des93 using "$clean/PODES/kindergarten across podes"
tab work kabcode_des93 if _merge==1,m
keep if _merge==3
drop _merge
ren kabcode_des93 kabcurrent
la var kabcurrent "current district of residence based on location in survey years"

*** infer preschool availability according to wave match rule
* pool public and private kindergartens
foreach j in 90 93 96 00 03 05 08 11 14 {
	gen kinderall`j' = kindergov`j' + kinderpvt`j'
	la var kinderall`j' "Kindergarten schools: All"
}

*** new match rule
foreach var in kinderall kinderpvt kindergov cdens {
	gen `var' = .
	replace `var' = `var'90 if year<=1990
	replace `var' = `var'93 if inrange(year,1991,1993)
	replace `var' = `var'96 if inrange(year,1994,1996)
	replace `var' = `var'00 if inrange(year,1997,2000)
	replace `var' = `var'03 if inrange(year,2001,2003)
	replace `var' = `var'05 if inrange(year,2004,2005)
	replace `var' = `var'08 if inrange(year,2006,2008)
	replace `var' = `var'11 if inrange(year,2009,2011)
	replace `var' = `var'14 if inrange(year,2012,2015)
}

* drop after matching rule defined
foreach var in kinderall kinderpvt kindergov cdens {
    foreach j in 90 93 96 00 03 05 08 11 14 {
	    drop `var'`j'
	}
}

**** merge PODES linear projection
ren kabcurrent kabcode_des93
merge m:1 kabcode_des93 year using "$clean/PODES/preschool density imputed"
keep if _merge==3
drop _merge
ren kabcode_des93 kabcurrent

***************************************************************************
***PART 16: Identify number of kids in age groups
***************************************************************************

merge 1:1 pidlink year using "$clean/IFLS/Household/Kids in age group"
unique pidlink if _merge==1
/* 1241 individuals unmerged
e.g. pidlink=="001240005" was not found even in the 5-waves appended version of
cleaned B4CH1
tab kid if _merge==1
only 15 obs with non-zero marginal kid
*/
keep if _merge==3 // requires that the sample includes females who at some point becomes mothers
drop _merge

**** merge birth year of first kid
merge 1:1 pidlink year using "$clean/IFLS/Household/First kid in age group"
keep if _merge==3
drop _merge

**** merge birth year of last kid
merge 1:1 pidlink year using "$clean/IFLS/Household/Last kid in age group"
keep if _merge==3
drop _merge

**** merge kid by birth order
merge 1:1 pidlink year using "$clean/IFLS/Household/Kid order in age group"
keep if _merge==3
drop _merge

**** merge individual fertility
merge m:1 pidlink using "$clean/IFLS/Household/Individual fertility"
keep if _merge==3
drop _merge


*** define number of kindergartens per 1000 children
foreach x in all pvt gov {
	gen nkinder`x' = kinder`x'/cdens*1000
}

** set data structure
encode pidlink, gen(id)
la var id "individual"
xtset id year
gen prov = int(kabcurrent/100)

** district time FE
gen kabyr = kabcurrent*10000+year

*** mother's age
gen age = year - bth_year

*** define age of first birth
gen age_birth1 = firstbirth - bth_year

*** define age of first kid
gen age_firstkid = year - firstbirth

*** define age of last kid
gen age_lastkid = year - lastbirth

**** define if any kid is:
* age 7-12
egen kid712 = rowtotal(kid7-kid12)
gen kid712_ = kid712>0
la var kid712 "Total any kid 7-12"
la var kid712_ "Dummy any kid 7-12"
* age 13-18
egen kid1318 = rowtotal(kid13-kid18)
gen kid1318_ = kid1318>0
la var kid1318 "Total kid 13-18"
la var kid1318_ "Dummy kid 13-18"

***** Method 3: Number of preschool & Dummy for child presence
* interaction terms has kid
capt drop tk_kid*
foreach y in all pvt gov {
	foreach x in 02 35 36 618 718 {
		gen tk_kid`y'`x' = nkinder`y' * kid`x'_
		la var tk_kid`y'`x' "Num. kindergartens * has kid `x'"
	}
}

foreach y in pvt gov {
	foreach x in 02 35 36 618 718 712 1318 {
		gen tkdens_kid`y'`x' = tkdens`y' * kid`x'_
		la var tkdens_kid`y'`x' "Preschool density * has kid `x'"
	}
}

* label variables
la var kid02_ "Has kid age 0-2"
la var kid35_ "Has kid age 3-5"
la var kid36_ "Has kid age 3-6"
la var kid618_ "Has kid age 6-18"
la var kid718_ "Has kid age 6-18"
la var kid099_ "Has kid age 0-99"

foreach y in all pvt gov {
	la var tk_kid`y'02 "Num. kindergartens * has kid 0-2"
	la var tk_kid`y'35 "Num. kindergartens * has kid 3-5"
	la var tk_kid`y'36 "Num. kindergartens * has kid 3-6"
	la var tk_kid`y'618 "Num. kindergartens * has kid 6-18"
	la var tk_kid`y'718 "Num. kindergartens * has kid 7-18"
}
foreach y in pvt gov {
	la var tkdens_kid`y'02 "Preschool density * has kid 0-2"
	la var tkdens_kid`y'35 "Preschool density * has kid 3-5"
	la var tkdens_kid`y'36 "Preschool density * has kid 3-6"
	la var tkdens_kid`y'618 "Preschool density * has kid 6-18"
	la var tkdens_kid`y'718 "Preschool density * has kid 7-18"
	la var tkdens_kid`y'712 "Preschool density * has kid 7-12"
	la var tkdens_kid`y'1318 "Preschool density * has kid 13-18"
	la var nkinder`y' "Num. kindergartens per 1000 children"
}
la var work "Work"

* interaction terms num kids
foreach y in all pvt gov {
	foreach x in 02 35 36 618 718 {
		gen tkkid`y'`x' = nkinder`y' * kid`x'
		la var tkkid`y'`x' "Num. kindergartens * num. kid `x'"
	}
}
foreach y in pvt gov {
	foreach x in 02 35 36 618 718 712 1318 {
		gen tkdenskid`y'`x' = tkdens`y' * kid`x'
		la var tkdenskid`y'`x' "Preschool density * num. kid `x'"
	}
}
* label variables
la var kid02 "Num. kid age 0-2"
la var kid35 "Num. kid age 3-5"
la var kid36 "Num. kid age 3-6"
la var kid618 "Num. kid age 6-18"
la var kid718 "Num. kid age 7-18"
foreach y in all pvt gov {
	la var tkkid`y'02 "Num. kindergartens * num. kid 0-2"
	la var tkkid`y'35 "Num. kindergartens * num. kid 3-5"
	la var tkkid`y'36 "Num. kindergartens * num. kid 3-6"
	la var tkkid`y'618 "Num. kindergartens * num. kid 6-18"
	la var tkkid`y'718 "Num. kindergartens * num. kid 7-18"
}
foreach y in pvt gov {
	la var tkdenskid`y'02 "Preschool density * num. kid 0-2"
	la var tkdenskid`y'35 "Preschool density * num. kid 3-5"
	la var tkdenskid`y'36 "Preschool density * num. kid 3-6"
	la var tkdenskid`y'618 "Preschool density * num. kid 6-18"
	la var tkdenskid`y'718 "Preschool density * num. kid 7-18"
	la var tkdenskid`y'712 "Preschool density * num. kid 7-12"
	la var tkdenskid`y'1318 "Preschool density * num. kid 13-18"
}

**** interaction terms with kid birth order
foreach y in all pvt gov {
forval j=1/6 {
	foreach x in 02 35 36 618 {
		gen tk_kid`j'_`y'`x' = nkinder`y' * kid`j'_age`x'
		la var tk_kid`j'_`y'`x' "Num. kindergartens * has kid `x'"
	}
}
}
foreach y in pvt gov {
forval j=1/6 {
	foreach x in 02 35 36 618 {
		gen tkdens_kid`j'_`y'`x' = tkdens`y' * kid`j'_age`x'
		la var tkdens_kid`j'_`y'`x' "Preschool density * has kid `x'"
	}
}
}
forval j=1/6 {
	foreach x in 35 36 {
		la var tk_kid`j'_pvt`x' "Private * Eligible"
		la var tk_kid`j'_gov`x' "Public * Eligible"
		la var tk_kid`j'_all`x' "All * Eligible"
	}
}

**** define if first/last kid is preschool age
foreach x in first last {
	egen `x'kid36 = rowtotal(`x'kid3-`x'kid6)
	gen `x'kid36_ = `x'kid36>0
	la var `x'kid36 "Total `x' kid preschool-aged"
	la var `x'kid36_ "Dummy `x' kid preschool-aged"
}
* age 0-2
foreach x in first last {
	egen `x'kid02 = rowtotal(`x'kid0-`x'kid2)
	gen `x'kid02_ = `x'kid02>0
	la var `x'kid02 "Total `x' kid 0-2"
	la var `x'kid02_ "Dummy `x' kid 0-2"
}
* age 7-12
foreach x in first last {
	egen `x'kid712 = rowtotal(`x'kid7-`x'kid12)
	gen `x'kid712_ = `x'kid712>0
	la var `x'kid712 "Total `x' kid 7-12"
	la var `x'kid712_ "Dummy `x' kid 7-12"
}
* age 13-18
foreach x in first last {
	egen `x'kid1318 = rowtotal(`x'kid13-`x'kid18)
	gen `x'kid1318_ = `x'kid1318>0
	la var `x'kid1318 "Total `x' kid 13-18"
	la var `x'kid1318_ "Dummy `x' kid 13-18"
}
******** interaction with preschool
foreach z in first last {
foreach y in all pvt gov {
	foreach x in 02 36 712 1318 {
		gen tk_`z'kid`y'`x' = nkinder`y' * `z'kid`x'_
		la var tk_`z'kid`y'`x' "Num. kindergartens * `z' kid `x'"
	}
}
}
foreach z in first last {
foreach y in pvt gov {
	foreach x in 02 36 712 1318 {
		gen tkdens_`z'kid`y'`x' = tkdens`y' * `z'kid`x'_
		la var tkdens_`z'kid`y'`x' "Preschool density * `z' kid `x'"
	}
}
}


**************** Standardization

*** z-scores
foreach v in work_om sidejob_om selfemp_om govwork_om pvtwork_om famwork_om ///
	professional_om manager_om clerk_om sales_om service_om agricultural_om production_om ///
	agriculture_om mining_om manufacturing_om utility_om construction_om ///
	trade_om logistic_om finance_om social_om ///
	{
	sum `v'
	gen z_`v' = (`v'-`r(mean)')/`r(sd)'
	}

******************* Define occupation labels
gen occlabel = occ
replace occlabel = "39" if occlabel=="3X"
replace occlabel = "49" if occlabel=="4X"
replace occlabel = "86" if occlabel=="7X" // there's no unclassified production worker, put in smallest freq: sound equipment operator
replace occlabel = "94" if occlabel=="9X"
replace occlabel = "100" if occlabel=="M1" | occlabel=="M2" | occlabel=="MM"
replace occlabel = "200" if occlabel=="SS"
replace occlabel = "300" if occlabel=="X2" | occlabel=="XX" | occlabel=="ZZ"
destring occlabel, replace
la def occlabel 0	"Professional, category 0, second digit could not be assigned" ///
1	"Physical scientists and related technicians" ///
2	"Architects, engineers, technologists" ///
3	"Surveyors, draftsmen, engineering assistants" ///
4	"Aircraft and ship's officer" ///
5	"Life scientists and related technicians" ///
6	"Physicians, medical assistants, dentists, dental assistants, pharmacists, nutritionists" ///
7	"Nurses, midwives, x-ray technicians, traditional medicine" ///
8	"Statisticians, mathematicians, system analysts and related technicians" ///
9	"Economists" ///
10	"Professional, category 1, second digit could not be assigned" ///
11	"Accountants and auditors" ///
12	"Jurists" ///
13	"Teachers" ///
14	"Workers in religion" ///
15	"Authors, critics, journalists, editors and related writers" ///
16	"Sculptors, painters, photographers and related creative artists" ///
17	"Composers, performing artists" ///
18	"Athletes, sportsmen and related workers" ///
19	"Professional and technical workers not elsewhere classified" ///
20	"Legislative officials and government administrators" ///
21	"Managers" ///
26	"Administrator unknown" ///
27	"Administrator, government" ///
28	"Administrator, non government" ///
29	"Manager not elsewhere classified (mostly school principals)" ///
30	"Clerical supervisors" ///
31	"Government executive of officials" ///
32	"Stenographers, typists and card tape-punching machine operators" ///
33	"Bookkeepers, cashiers, and related workers" ///
34	"Computing machine operators" ///
35	"Transport and communications supervisors" ///
36	"Transport conductors" ///
37	"Mail distributors and related workers" ///
38	"Telephone and telegraph operators" ///
39	"Clerical and related workers not elsewhere classified" ///
40	"Managers (wholesale and retail trade)" ///
41	"Working proprietors (wholesale and retail trade)" ///
42	"Sales supervisors and buyers" ///
43	"Technical salesman, commercial travellers, manufacturer's agents" ///
44	"Insurance, real estate, securities and business services salesman and auctioneers" ///
45	"Salesmen, shop assistants and related workers" ///
48	"Sales agent" ///
49	"Sales workers not elsewhere classified" ///
50	"Managers (catering and lodging services)" ///
51	"Working propriators (catering and lodging services)" ///
52	"Housekeeping and related service supervisors" ///
53	"Cooks, waiters, bartenders and related workers" ///
54	"Maids and related housekeeping service workers NEC" ///
55	"Building caretakers, charworkers, cleaners and related workers" ///
56	"Launderers, dry-cleaners and pressers" ///
57	"Hairdressers, barbers, beauticians and related workers" ///
58	"Protective service workers" ///
59	"Service workers not elsewhere classified" ///
60	"Plantation managers and supervisors" ///
61	"Planters and farmers" ///
62	"Agricultural and animal husbandry workers" ///
63	"Forestry workers" ///
64	"Fishermen, hunters, and related workers" ///
69	"Agricultural worker not elsewhere classified" ///
70	"Production supervisors and general foremen" ///
71	"Miners, quarrymen, well drillers and related workers" ///
72	"Metal processers" ///
73	"Wood preparation workers and paper makers" ///
74	"Chemical processers and related workers" ///
75	"Spinners, weavers, knitters, dyers, and related workers" ///
76	"Tanners, fellmongers and pelt dressers" ///
77	"Food and beverage processors" ///
78	"Tobacco preparers and tobacco product makers" ///
79	"Tailors, dressmakers, sewer, upholsterers and related workers" ///
80	"Shoemakers and leather good makers" ///
81	"Cabinet makers and related wood makers" ///
82	"Stone cutters and carvers" ///
83	"Blacksmith, tool makers and machine tool operators" ///
84	"Machinery fitters, assemblers, repairers and precision instrument makers (except electrical)" ///
85	"Electrical fitters and related electrical and electronics workers" ///
86	"Broadcasting station, sound equipment operators and cinema projectionists" ///
87	"Plumbers, welders, sheet-metal and structural metal preparers and erectors" ///
88	"Jewelry and precious metal workers" ///
89	"Glass formers, potters and related workers" ///
90	"Rubber and plastics product makers" ///
91	"Paper board products makers" ///
92	"Printers and related workers" ///
93	"Painters" ///
94	"Production and related workers not elsewhere classified" ///
95	"Bricklayers, carpenters and other construction workers" ///
96	"Stationary engines and related equipment operators" ///
97	"Material handling and related equipment, operators dockers and freight handlers" ///
98	"Transport equipment operators" ///
99	"Laborers not elsewhere classified" ///
100	"Military" ///
200	"Student" ///
300	"Unlabeled"
la val occlabel occlabel

***************************************************************************
***PART 17: Define areas with higher than median number of preschools
***************************************************************************

* median number of preschool by year
foreach x in all pvt gov {
	bys year: egen mednkinder`x' = median(nkinder`x')
	la var mednkinder`x' "Median number of kindergartens per 1000 children"
	gen hikinder`x' = nkinder`x' > mednkinder`x'
	replace hikinder`x' = . if nkinder`x'==. | mednkinder`x'==.
	la var hikinder`x' "High number of kindergartens"
}

***************************************************************************
***PART 18: Define other outcome variables
***************************************************************************
/* where it come from */
gen wrkstat1=workstat
destring ind, gen(ind1)
*** outcome variables
* Self-employment as outcome variable
	gen selfemp = wrkstat1<=3 if !missing(wrkstat1)
* casual employment as outcome variable
	gen casual = inrange(wrkstat1,7,8) if !missing(wrkstat1)
* Unpaid employment as outcome variable
	gen unpaid = wrkstat1==6 if !missing(wrkstat1)
* Generate 3 industries indicator
	gen iagriculture = ind1==1 if !missing(ind1)
	gen manufacturing = ind1==3 if !missing(ind1)
	gen retail = ind1==6 if !missing(ind1)

drop occ
* Aggregate 2 digit occupation to 1-Digit code
	la def occ_ 1 "Professional/Technical" 2 "Administrative and Managerial" ///
	  3 "Clerical and related workers" 4 "Sales Workers" 5 "Service Workers" ///
	  6 "Agricultural, Animal Husbandry, Forestry workers, Fisherman and Hunters" ///
	  7 "Production and related workers, Transport Operators, and Laborers", modify
	gen occ = int(occlabel/10)
	recode occ (0=1) (8=7) (9=7)
	la val occ occ_
	* Generate 4 occupation indicator
	gen sales = occ==4 if !missing(occ)
	gen service = occ==5 if !missing(occ)
	gen oagriculture = occ==6 if !missing(occ)
	gen production = occ==7 if !missing(occ)
*
***** label variales
la var work_om "Work participation (otherwise missing)"
la var work_on "Work participation (otherwise no)"
la var sidejob_om "Has a side job (otherwise missing)"
la var sidejob_on "Has a side job (otherwise no)"
la var selfemp_om "Self-employed (otherwise missing)"
la var selfemp_on "Self-employed (otherwise no)"
la var selfemp_c "Self-employed (conditional on work)"
la var govwork_om "Government employee (otherwise missing)"
la var govwork_on "Government employee (otherwise no)"
la var govwork_c "Government employee (conditional on work)"
la var pvtwork_om "Private employee (otherwise missing)"
la var pvtwork_on "Private employee (otherwise no)"
la var pvtwork_c "Private employee (conditional on work)"
la var famwork_om "Family worker (otherwise missing)"
la var famwork_on "Family worker (otherwise no)"
la var famwork_c "Family worker (conditional on work)"
la var professional_om "Professional (otherwise missing)"
la var professional_on "Professional (otherwise no)"
la var professional_c "Professional (conditional on work)"
la var manager_om "Manager (otherwise missing)"
la var manager_on "Manager (otherwise no)"
la var manager_c "Manager (conditional on work)"
la var clerk_om "Clerk (otherwise missing)"
la var clerk_on "Clerk (otherwise no)"
la var clerk_c "Clerk (conditional on work)"
la var sales_om "Sales (otherwise missing)"
la var sales_on "Sales (otherwise no)"
la var sales_c "Sales (conditional on work)"
la var service_om "Service (otherwise missing)"
la var service_on "Service (otherwise no)"
la var service_c "Service (conditional on work)"
la var agricultural_om "Agricultural (otherwise missing)"
la var agricultural_on "Agricultural (otherwise no)"
la var agricultural_c "Agricultural (conditional on work)"
la var production_om "Production (otherwise missing)"
la var production_on "Production (otherwise no)"
la var production_c "Production (conditional on work)"
la var agriculture_om "Agriculture (otherwise missing)"
la var agriculture_on "Agriculture (otherwise no)"
la var agriculture_c "Agriculture (conditional on work)"
la var mining_om "Mining (otherwise missing)"
la var mining_on "Mining (otherwise no)"
la var mining_c "Mining (conditional on work)"
la var manufacturing_om "Manufacturing (otherwise missing)"
la var manufacturing_on "Manufacturing (otherwise no)"
la var manufacturing_c "Manufacturing (conditional on work)"
la var utility_om "Utility (otherwise missing)"
la var utility_on "Utility (otherwise no)"
la var utility_c "Utility (conditional on work)"
la var construction_om "Construction (otherwise missing)"
la var construction_on "Construction (otherwise no)"
la var construction_c "Construction (conditional on work)"
la var trade_om "Trade (otherwise missing)"
la var trade_on "Trade (otherwise no)"
la var trade_c "Trade (conditional on work)"
la var logistic_om "Logistic (otherwise missing)"
la var logistic_on "Logistic (otherwise no)"
la var logistic_c "Logistic (conditional on work)"
la var finance_om "Finance (otherwise missing)"
la var finance_on "Finance (otherwise no)"
la var finance_c "Finance (conditional on work)"
la var social_om "Social (otherwise missing)"
la var social_on "Social (otherwise no)"
la var social_c "Social (conditional on work)"

****** choose most likely years of schooling
*** work with tempfile
preserve
keep pidlink yos* bth_year
drop yos
ren yos93 yos1993
ren yos97 yos1997
ren yos00 yos2000
ren yos07 yos2007
ren yos14 yos2014
duplicates drop
reshape long yos, i(pidlink) j(year)
** admissible education: still getting more education till age 22
gen yos_ = yos if year - bth_year>=22
** step 1: get mode of years of education
egen modeyos = mode(yos_), by(pidlink) minmode // in case of multi modes --> choose min value
** step 2: get max years of education if no admissible education
egen maxyos = max(yos), by(pidlink)
** step 3: put max years of education into mode if missing
replace modeyos = maxyos if modeyos==.
** keep mode yos
keep pidlink modeyos
duplicates drop
tempfile modeyos
save `modeyos'
restore
*** merge temp file
merge m:1 pidlink using `modeyos'
drop _merge
la var modeyos "Years of education"

***************************************************************************
***PART 19: Combine with marriage history
***************************************************************************

egen tagid = tag(pidlink)
unique pidlink // # mothers = 10340

merge m:1 pidlink using "$ifls_clean1/b4_kw2", keep(1 3) ///
	keepusing(firstwed_yr firstwed_age)
drop _merge
merge m:1 pidlink using "$ifls_clean2/b4_kw2", update keep(1 3 4 5) ///
	keepusing(firstwed_yr firstwed_age)
drop _merge
merge m:1 pidlink using "$ifls_clean3/b4_kw2", update keep(1 3 4 5) ///
	keepusing(firstwed_yr firstwed_age)
drop _merge
merge m:1 pidlink using "$ifls_clean4/b4_kw2", update keep(1 3 4 5) ///
	keepusing(firstwed_yr firstwed_age)
drop _merge
merge m:1 pidlink using "$ifls_clean5/b4_kw2", update keep(1 3 4 5) ///
	keepusing(firstwed_yr firstwed_age)
drop _merge
drop tagid

***************************************************************************
***PART 20: Save
***************************************************************************
*** add podes years dummies
gen podes=inlist(year,1990,1993,1996,2000,2003,2005,2008,2011,2014)

save "$clean/preschool", replace
